def train(display_on, speed, params): pygame.init() pygame.font.init() agent = DQNAgent(params) counter_games = 0 high_score = 0; #score_plot = [] #counter_plot = [] while counter_games < params['episodes']: game = Game(440, 440, high_score) if display_on: game.update_display() while not game.crash: if handle_game_event(game): return # agent.epsilon is set to give randomness to actions agent.epsilon = 1 - (counter_games * params['epsilon_decay_linear']) state = game.get_state() move = get_move(agent, state) game.do_move(move) new_state = game.get_state() agent.set_reward(game.crash, game.player.eaten) # train short memory base on the new action and state agent.train_short_memory(state, move, new_state, game.crash) # store the new data into a long term memory agent.remember(state, move, new_state, game.crash) if display_on: game.update_display() pygame.time.wait(speed) counter_games += 1 print(f'Game {counter_games} Score: {game.score}') high_score = game.high_score agent.replay_new(agent.memory, params['batch_size']) agent.model.save_weights(params['weights_path']) pygame.quit()
def run(): pygame.init() agent = DQNAgent() counter_games = 0 score_plot = [] counter_plot =[] record = 0 while counter_games < 100: game = Game(width, height) pipe = game.pipe player = game.player while not game.crash: agent.epsilon = 60 - counter_games state_old = agent.get_state(game, player, pipe) if randint(0, 200) < agent.epsilon: final_move = to_categorical(randint(0, 1), num_classes=2) else: # predict action based on the old state prediction = agent.model.predict(state_old.reshape((1,6))) final_move = to_categorical(np.argmax(prediction[0]), num_classes=2) player.Move(game,final_move) pipe.move_pipe(160) ##check_collision(player,pipe) check_score(pipe,game ) reward = agent.set_reward(pipe, game.crash) state_new = agent.get_state(game, player, pipe) agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory agent.remember(state_old, final_move, reward, state_new, game.crash) display(game,player,pipe,record) if game.score == 20: game.crash = True; agent.replay_new(agent.memory) counter_games += 1 print('Game', counter_games, ' Score:', game.score) score_plot.append(game.score) counter_plot.append(counter_games) record = get_record(game.score, record) agent.model.save_weights('weights.hdf5') plot_seaborn(counter_plot, score_plot) pygame.quit()
def run(): agent = DQNAgent(size) counter_games = 0 score_plot = [] counter_plot = [] record = 0 while counter_games < games: # Initialize classes game = Game(size, size) player1 = game.player food1 = game.food # Perform first move initialize_game(player1, game, food1, agent) if display_option: display(player1, food1, game, record) while not game.crash: #agent.epsilon is set to give randomness to actions agent.epsilon = (games * 0.4) - counter_games #get old state state_old = agent.get_state(game, player1, food1) #perform random actions based on agent.epsilon, or choose the action if randint(0, games) < agent.epsilon: final_move = to_categorical(randint(0, 2), num_classes=3) else: # predict action based on the old state prediction = agent.model.predict(state_old.reshape((1, agent.size))) final_move = to_categorical(np.argmax(prediction[0]), num_classes=3) #perform new move and get new state player1.do_move(final_move, player1.x, player1.y, game, food1, agent) state_new = agent.get_state(game, player1, food1) #set reward for the new state reward = agent.set_reward(player1, game.crash) #train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory agent.remember(state_old, final_move, reward, state_new, game.crash) record = get_record(game.score, record) if display_option: display(player1, food1, game, record) pygame.time.wait(speed) agent.replay_new(agent.memory) counter_games += 1 score_plot.append(game.score) counter_plot.append(counter_games) print('Game', counter_games, ' Score:', game.score, 'Last 10 Avg:', np.mean(score_plot[-10:])) agent.model.save_weights('weights.hdf5') plot_seaborn(counter_plot, score_plot)
def run(): pygame.init() agent = DQNAgent() counter_games = 0 score_plot = [] counter_plot = [] record = 0 while counter_games < 150: # Initialize classes game = Game(440, 440) player1 = game.player food1 = game.food # Perform first move initialize_game(player1, game, food1, agent) if display_option: display(player1, food1, game, record) while not game.crash: # agent.epsilon is set to give randomness to actions agent.epsilon = 80 - counter_games # get old state state_old = agent.get_state(game, player1, food1) # predict action based on the old state final_move = agent.predict(state_old) # perform new move and get new state player1.do_move(final_move, player1.x, player1.y, game, food1, agent) state_new = agent.get_state(game, player1, food1) # set treward for the new state reward = agent.set_reward(player1, game.crash) # train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory agent.remember(state_old, final_move, reward, state_new, game.crash) record = get_record(game.score, record) if display_option: display(player1, food1, game, record) pygame.time.wait(speed) agent.replay_new(agent.memory) counter_games += 1 print('Game', counter_games, ' Score:', game.score) score_plot.append(game.score) counter_plot.append(counter_games) agent.model.save_weights('weights.hdf5') plot_seaborn(counter_plot, score_plot)
def run(): pygame.init() agent = DQNAgent() counter_games = 0 score_plot = [] counter_plot = [] record = 0 while counter_games < 150: # Initialize classes game = Game(440, 440) player1 = game.player food1 = game.food # Perform first move initialize_game(player1, game, food1, agent) if display_option: display(player1, food1, game, record) while not game.crash: agent.epsilon = 80 - counter_games state_old = agent.get_state(game, player1, food1) if randint(0, 200) < agent.epsilon: final_move = to_categorical(randint(0, 2), num_classes=3)[0] else: prediction = agent.model.predict(state_old.reshape((1, 11))) final_move = to_categorical(np.argmax(prediction[0]), num_classes=3)[0] player1.do_move(final_move, player1.x, player1.y, game, food1, agent) state_new = agent.get_state(game, player1, food1) reward = agent.set_reward(player1, game.crash) agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) agent.remember(state_old, final_move, reward, state_new, game.crash) record = get_record(game.score, record) if display_option: display(player1, food1, game, record) pygame.time.wait(speed) agent.replay_new(agent.memory) counter_games += 1 print('Game', counter_games, ' Score:', game.score) score_plot.append(game.score) counter_plot.append(counter_games) agent.model.save_weights('weights.hdf5') plot_seaborn(counter_plot, score_plot)
def main(): # Initialisieren aller Pygame-Module und # Fenster erstellen (wir bekommen eine Surface, die den Bildschirm repräsentiert). pygame.init() agent = DQNAgent() counter_games = 0 record = 0 while counter_games < 150: screen = pygame.display.set_mode((800, 600)) # Titel des Fensters setzen, Mauszeiger nicht verstecken und Tastendrücke wiederholt senden. pygame.display.set_caption("Pygame-Tutorial: Animation") pygame.mouse.set_visible(1) pygame.key.set_repeat(1, 30) pygame.font.init() # you have to call this at the start, myfont = pygame.font.SysFont('Comic Sans MS', 30) # Clock-Objekt erstellen, das wir benötigen, um die Framerate zu begrenzen. clock = pygame.time.Clock() # Wir erstellen eine Tilemap. map = Tilemap.Tilemap() event = AutoInput.AutoInput() # Die Schleife, und damit unser Spiel, läuft solange running == True. running = True max_steps_reached = False max_steps = 100 step = 0 max_score = map.player.pos_x max_score_evolution = [] while running and not max_steps_reached: agent.epsilon = 80 - counter_games #get old state state_old = agent.get_state(map) map.player.pos_x_old = map.player.pos_x #perform random actions based on agent.epsilon, or choose the action if randint(0, 200) < agent.epsilon: final_move = to_categorical(randint(0, 2), num_classes=3) else: # predict action based on the old state prediction = agent.model.predict(state_old.reshape((1, 7))) final_move = to_categorical(np.argmax(prediction[0]), num_classes=3) # Framerate auf 30 Frames pro Sekunde beschränken. # Pygame wartet, falls das Programm schneller läuft. clock.tick(30) # screen Surface mit Schwarz (RGB = 0, 0, 0) füllen. screen.fill((198, 209, 255)) map.handle_input(final_move) #continue jump animation after if map.player.isjump: map.player.jump() # Die Tilemap auf die screen-Surface rendern. map.render(screen) textsurface = myfont.render( "Game " + str(counter_games) + " Step " + str(step) + " Max Score " + str(max_score), False, (0, 0, 0)) screen.blit(textsurface, (50, 50)) #Print Hindernis onto map and check if there should be a new one if not map.isThereHindernis: map.createNewHindernis() map.isThereHindernis = True map.hindernis.move() map.hindernis.render(screen) map.checkHindernisOnMap() state_new = agent.get_state(map) crash = map.collisionDetection() #set treward for the new state reward = agent.set_reward(map.player, crash) #train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, running) # Inhalt von screen anzeigen pygame.display.flip() if map.player.pos_x > max_score: max_score = map.player.pos_x step += 1 if step >= max_steps: max_steps_reached = True max_score_evolution.append(max_score) agent.remember(state_old, final_move, reward, state_new, running) #record = get_record(map.player.pos_x, record) #if display_option: # #display(player1, food1, game, record) # pygame.time.wait(speed) agent.replay_new(agent.memory) counter_games += 1 agent.model.save_weights('weights.hdf5') sns.plot(max_score_evolution)
def run(episodes, load_weights, display_option, speed): pygame.init() agent = DQNAgent() weights_filepath = os.path.join(os.getcwd(), WEIGHTS_FILENAME) if load_weights and os.path.isfile(weights_filepath): agent.model.load_weights(weights_filepath) counter_games = 0 score_plot = [] counter_plot = [] record = 0 while counter_games < episodes: for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() quit() # Initialize classes game = Game(440, 440) player1 = game.player food1 = game.food # Perform first move initialize_game(player1, game, food1, agent) if display_option: display(player1, food1, game, record) while not game.crash: # agent.epsilon is set to give randomness to actions agent.epsilon = 80 - counter_games # get old state state_old = agent.get_state(game, player1, food1) # perform random actions based on agent.epsilon, or choose the action if randint(0, 200) < agent.epsilon: final_move = to_categorical(randint(0, 2), num_classes=3) else: # predict action based on the old state prediction = agent.model.predict(state_old.reshape((1, 11))) final_move = to_categorical(np.argmax(prediction[0]), num_classes=3) # perform new move and get new state player1.do_move(final_move, player1.x, player1.y, game, food1, agent) state_new = agent.get_state(game, player1, food1) # set treward for the new state reward = agent.set_reward(player1, game.crash) # train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory agent.remember(state_old, final_move, reward, state_new, game.crash) record = get_record(game.score, record) if display_option: display(player1, food1, game, record) pygame.time.wait(speed) agent.replay_new(agent.memory) counter_games += 1 print(f'Game {counter_games} Score: {game.score}') score_plot.append(game.score) counter_plot.append(counter_games) agent.model.save_weights(WEIGHTS_FILENAME) plot_seaborn(counter_plot, score_plot)
def run(params): """ Run the DQN algorithm, based on the parameters previously set. """ pygame.init() agent = DQNAgent(params) agent = agent.to(DEVICE) agent.optimizer = optim.Adam(agent.parameters(), weight_decay=0, lr=params['learning_rate']) counter_games = 0 score_plot = [] counter_plot = [] record = 0 total_score = 0 while counter_games < params['epoch']: for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() quit() # Initialize classes game = Game(440, 440) player1 = game.player food1 = game.food # Pierwszy ruch initialize_game(player1, game, food1, agent, params['batch_size']) if params['display']: display(player1, food1, game, record) steps = 0 # Ruchy od ostatniej otrzymanej nagrody while (not game.crash) and (steps < 100): if not params['train']: agent.epsilon = 0.01 else: # agent.epsilon dla losowosci akcji agent.epsilon = 1 - (counter_games * params['epsilon_decay_linear']) # Otrzymaj stary stan state_old = agent.get_state(game, player1, food1) # Wykonuj losowe akcje na podstawie agent.epsilon albo na podstawie starych if random.uniform(0, 1) < agent.epsilon: final_move = np.eye(3)[randint(0,2)] else: # Predykcja na podstawie starych akcji with torch.no_grad(): state_old_tensor = torch.tensor(state_old.reshape((1, 11)), dtype=torch.float32).to(DEVICE) prediction = agent(state_old_tensor) final_move = np.eye(3)[np.argmax(prediction.detach().cpu().numpy()[0])] # Wykonaj nowy ruch i otrzymaj nowy stan player1.do_move(final_move, player1.x, player1.y, game, food1, agent) state_new = agent.get_state(game, player1, food1) # Okresl nagrode dla nowego ruchu reward = agent.set_reward(player1, game.crash) # Kiedy zje, zeruj kroki if reward > 0: steps = 0 if params['train']: # Trenuj pamiec krotka na podstawie nowych ruchow agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # Zapisz pamiec nowych ruchow do pamieci dlugiej agent.remember(state_old, final_move, reward, state_new, game.crash) record = get_record(game.score, record) if params['display']: display(player1, food1, game, record) pygame.time.wait(params['speed']) steps+=1 if params['train']: agent.replay_new(agent.memory, params['batch_size']) counter_games += 1 total_score += game.score print(f'Game {counter_games} Score: {game.score}') score_plot.append(game.score) counter_plot.append(counter_games) mean, stdev = get_mean_stdev(score_plot) if params['train']: model_weights = agent.state_dict() torch.save(model_weights, params["weights_path"]) if params['plot_score']: plot_seaborn(counter_plot, score_plot, params['train']) return total_score, mean, stdev
def run(params): """ Run the DQN algorithm, based on the parameters previously set. """ pygame.init() agent = DQNAgent(params) agent = agent.to(DEVICE) agent.optimizer = optim.Adam(agent.parameters(), weight_decay=0, lr=params['learning_rate']) counter_games = 0 score_plot = [] counter_plot = [] record = 0 total_score = 0 while counter_games < params['episodes']: for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() quit() # Initialize classes game = Game(440, 440) player1 = game.player food1 = game.food # Perform first move initialize_game(player1, game, food1, agent, params['batch_size']) if params['display']: display(player1, food1, game, record) while not game.crash: if not params['train']: agent.epsilon = 0.01 else: # agent.epsilon is set to give randomness to actions agent.epsilon = 1 - (counter_games * params['epsilon_decay_linear']) # get old state state_old = agent.get_state(game, player1, food1) # perform random actions based on agent.epsilon, or choose the action if random.uniform(0, 1) < agent.epsilon: final_move = np.eye(3)[randint(0, 2)] else: # predict action based on the old state with torch.no_grad(): state_old_tensor = torch.tensor( state_old.reshape( (1, 11)), dtype=torch.float32).to(DEVICE) prediction = agent(state_old_tensor) final_move = np.eye(3)[np.argmax( prediction.detach().cpu().numpy()[0])] # perform new move and get new state player1.do_move(final_move, player1.x, player1.y, game, food1, agent) state_new = agent.get_state(game, player1, food1) # set reward for the new state reward = agent.set_reward(player1, game.crash) if params['train']: # train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory agent.remember(state_old, final_move, reward, state_new, game.crash) record = get_record(game.score, record) if params['display']: display(player1, food1, game, record) pygame.time.wait(params['speed']) if params['train']: agent.replay_new(agent.memory, params['batch_size']) counter_games += 1 total_score += game.score print(f'Game {counter_games} Score: {game.score}') score_plot.append(game.score) counter_plot.append(counter_games) mean, stdev = get_mean_stdev(score_plot) if params['train']: model_weights = agent.state_dict() torch.save(model_weights, params["weights_path"]) if params['plot_score']: plot_seaborn(counter_plot, score_plot, params['train']) return total_score, mean, stdev
def gameLoop(): game_over = False games = 0 games_to_play = 300 highscore = 0 score_plot = [] games_plot = [] agent = DQNAgent() while games < games_to_play: # Play a total of x games xpos = dis_width / 2 # X Spawn point coordinate ypos = dis_height / 2 # Y Spawn point coordinate xdir = 0 ydir = 0 snake_List = [] length_of_snake = 1 food = random_food(snake_List) xfood = food[0] yfood = food[1] while not game_over: agent.epsilon = 80 - games # agent.epsilon = 0 state_old = agent.get_state(xpos, ypos, xdir, ydir, snake_block, xfood, yfood, dis_width, dis_height, snake_List) if randint(0, 200) < agent.epsilon: final_move = to_categorical(randint(0, 2), num_classes=3) else: prediction = agent.model.predict(state_old.reshape((1, 11))) final_move = to_categorical(np.argmax(prediction[0]), num_classes=3) # Do Action ------------------------------------------------------------- if np.array_equal(final_move, [1, 0, 0]): pass elif np.array_equal( final_move, [0, 1, 0]) and ydir is 0: # right - going horizontal ydir = snake_block xdir = 0 elif np.array_equal( final_move, [0, 1, 0]) and xdir is 0: # right - going vertical xdir = snake_block ydir = 0 elif np.array_equal( final_move, [0, 0, 1]) and ydir is 0: # left - going horizontal ydir = -snake_block xdir = 0 elif np.array_equal( final_move, [0, 0, 1]) and xdir is 0: # left - going vertical xdir = -snake_block ydir = 0 # Did Action ------------------------------------------------------------- # Update Frame after Action ---------------------------------------------- eaten = False # If collide with border if xpos == dis_width - snake_block and xdir > 0 or xpos == 0 and xdir < 0 or ypos == dis_height - snake_block and ydir > 0 or ypos == 0 and ydir < 0: game_over = True xpos += xdir ypos += ydir dis.fill(blue) pygame.draw.rect(dis, green, [xfood, yfood, snake_block, snake_block]) snake_Head = [xpos, ypos] snake_List.append(snake_Head) if len(snake_List) > length_of_snake: del snake_List[0] for x in snake_List[:-1]: if x == snake_Head: game_over = True draw_snake(snake_block, snake_List) highscore = get_highscore(highscore, length_of_snake - 1) your_score(highscore, length_of_snake - 1) if xpos == xfood and ypos == yfood: eaten = True food = random_food(snake_List) xfood = food[0] yfood = food[1] length_of_snake += 1 # Updated Frame after Action --------------------------------------------- state_new = agent.get_state(xpos, ypos, xdir, ydir, snake_block, xfood, yfood, dis_width, dis_height, snake_List) reward = agent.set_reward(game_over, eaten) agent.train_short_memory(state_old, final_move, reward, state_new, game_over) agent.remember(state_old, final_move, reward, state_new, game_over) pygame.display.update() clock.tick(snake_speed) print("Game:", games + 1, "Score:", length_of_snake - 1, "Highscore:", highscore) agent.replay_new(agent.memory) games += 1 score_plot.append(length_of_snake - 1) games_plot.append(games) game_over = False agent.model.save_weights('weights10x10V3.hdf5') pygame.quit() # Plot stats of game: sns.set(color_codes=True) ax = sns.regplot(x=games_plot, y=score_plot) ax.set(xlabel='games', ylabel='score') plt.show() quit()
def run(): counter_games = 0 score_plot = [] counter_plot = [] record = 0 agent = DQNAgent(TEST_MODE) if TEST_MODE: pygame.init() pygame.font.init() print('Start testing the trained model ...') else: print('Start training ...') while counter_games < 150: # Initialize classes game = Game(440, 440) player1 = game.player food1 = game.food # Perform first move initialize_game(player1, game, food1, agent) if TEST_MODE: display(player1, food1, game, record) while not game.crash: #agent.epsilon is set to give randomness to actions agent.epsilon = 80 - counter_games #get old state state_old = agent.get_state(game, player1, food1) #perform random actions based on agent.epsilon, or choose the action if randint(0, 200) < agent.epsilon and not TEST_MODE: final_move = to_categorical(randint(0, 2), num_classes=3) else: # predict action based on the old state prediction = agent.model.predict(state_old) final_move = to_categorical(np.argmax(prediction[0]), num_classes=3) #perform new move and get new state player1.do_move(final_move, player1.x, player1.y, game, food1, agent) state_new = agent.get_state(game, player1, food1) #set treward for the new state reward = agent.set_reward(player1, game.crash) #train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory agent.remember(state_old, final_move, reward, state_new, game.crash) record = get_record(game.score, record) if TEST_MODE: display(player1, food1, game, record) pygame.time.wait(speed) agent.replay_new(agent.memory) counter_games += 1 print('Game', counter_games, ' Score:', game.score) score_plot.append(game.score) counter_plot.append(counter_games) agent.model.save_weights('weights.hdf5') plot_seaborn(counter_plot, score_plot)
def main(): # Initialisation du reseau agent = DQNAgent() taille = 30 # Nombre de cellules = taille * taille game = Game(taille) # Initialisation du jeu init_ihm(game) # Initialization IHM max_iteration, cpt_iteration = 200, 0 # Nombre de parties à jouer et Compteur de parties rep_lines_bestScore, score_plot, counter_plot = [], [], [ ] # scores, numéros de parties, lignes du meilleur score while (cpt_iteration < max_iteration): game = Game(taille) # Initialisation du jeu game.calculer_lignes_jouables( ) # Calculer les lignes possibles d'etres jouées if len(game.rep_playable_lines ) > 1: # choisir une ligne parmis les lignes jouable au hasard game.choosed_line = game.rep_playable_lines[random.randint( 0, len(game.rep_playable_lines) - 1)] elif len(game.rep_playable_lines) != 0: game.choosed_line = game.rep_playable_lines[0] while not game.crash and game.cpt_liberte < game.liberte: # Tant qu'il reste encore des lignes possibles à jouer agent.epsilon = 120 - cpt_iteration state_old = agent.get_state(game) # get old state # perform random actions based on agent.epsilon, or choose the action if randint(0, 200) < agent.epsilon: final_move = to_categorical(randint(0, 4), num_classes=8) else: # predict action based on the old state prediction = agent.model.predict(state_old.reshape((1, 80))) final_move = to_categorical(np.argmax(prediction[0]), num_classes=8) game.move = final_move vect_move = [] # liste des sorties possibles vect_squar = [[-1, -1], [-1, 0], [-1, 1], [0, 1], [1, 1], [1, 0], [1, -1], [0, -1]] for i in range(len(vect_squar)): vect_move.append([0, 0, 0, 0, 0, 0, 0, 0]) # initialisation des sorties for i in range(len(vect_squar)): vect_move[i][i] = 1 # Matrice identité 8*8 for i in range(len(vect_move)): if vect_move[i] == final_move.tolist( ): # à quel sortie correspond le final_move game.x_player = int( game.x_player + vect_squar[i][0] ) # deplacer le jouer d'après la sortie indiqué par final_move game.y_player = int(game.y_player + vect_squar[i][1]) game.move = vect_move[i] # sauvegarder mouvememnt game.found = False for i in range( len(game.rep_playable_lines) - 1 ): # si une ligne jouable commence par la position du joueur if len(game.rep_playable_lines ) != 0 and game.rep_playable_lines[i][0][ 0].x == game.x_player and game.rep_playable_lines[ i][0][0].y == game.y_player: game.found = True game.jouer_ligne( game.rep_playable_lines[i] ) # Jouer la ligne tmp_line=[[cellule * 5],direction] game.rep_lines.append( game.rep_playable_lines[i] ) # Ajouter la ligne à la liste des lignes jouées game.calculer_lignes_jouables( ) # Recalcule des lignes jouables if cpt_iteration == 0 or len( game.rep_lines) > max(score_plot): rep_lines_bestScore = game.rep_lines # Sauvegarder la liste des lignes du meilleur score state_new = agent.get_state(game) reward = agent.set_reward(len(game.rep_playable_lines), game.found) # train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory agent.remember(state_old, final_move, reward, state_new, game.crash) game.cpt_liberte = 0 break if len(game.rep_playable_lines ) == 0: # s'il reste plus de lignes jouables game.crash = True state_new = agent.get_state(game) reward = agent.set_reward(0, game.found) # train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory agent.remember(state_old, final_move, reward, state_new, game.crash) elif game.cpt_liberte > game.liberte - 2 and not game.found: game.x_player = 13 # retourr à la case de départ game.y_player = 13 game.crash = True game.found = False state_new = agent.get_state(game) reward = agent.set_reward(0, game.found) # train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory agent.remember(state_old, final_move, reward, state_new, game.crash) elif game.found == False: game.cpt_liberte += 1 if cpt_iteration == 0 or len(game.rep_lines) > max( score_plot): # Si c'est le meilleur score atteint rep_lines_bestScore = game.rep_lines # Sauvegarder la liste des lignes du meilleur score score_plot.append(len( game.rep_lines)) # Ajouter le score à la liste des scores cpt_iteration += 1 # augmenter le nombre de partie print(cpt_iteration, "-Score: ", len(game.rep_lines), " - Best score: ", max(score_plot)) counter_plot.append(cpt_iteration) # ajouter le numéro de la partie agent.replay_new(agent.memory) afficher_lignes_ihm( rep_lines_bestScore, max_iteration, max(score_plot)) # afficher dans l'ihm les lignes du meilleur score plot_seaborn(counter_plot, score_plot) # afficher le shéma des scores selon les parties mainloop()
def run(params): pygame.init() agent = DQNAgent(params) print_info(params) weights_filepath = params['weights_path'] if params['load_weights']: agent.model.load_weights(weights_filepath) print("Weights Loaded") else: print("Training From Scratch...") counter_games = 0 score_plot = [] counter_plot = [] record = 0 while counter_games < params['episodes']: for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() quit() # Class Objects game = Game(params) player1 = game.player food1 = game.food total_reward = -150 total_reward2 = 0 # First Move initialize_game(player1, game, food1, agent, params['batch_size'], counter_games) if params['display']: display(player1, food1, game, record, counter_games, total_reward, params) while not game.crash: if not params['train']: agent.epsilon = 0 else: # agent.epsilon is set to give randomness to actions agent.epsilon = 1 - ( (counter_games) * params['epsilon_decay_linear']) # old State state_old = agent.get_state(game, player1, food1, params) # Random Actions or Choose if randint(0, 1) < agent.epsilon and not params['load_weights']: final_move = to_categorical(randint(0, 2), num_classes=3) # Prediction else: prediction = agent.model.predict(state_old.reshape((1, 20))) final_move = to_categorical(np.argmax(prediction[0]), num_classes=3) player1.do_move(final_move, player1.x, player1.y, game, food1, agent) state_new = agent.get_state(game, player1, food1, params) reward = agent.set_reward(player1, game.crash, food1, counter_games, final_move) total_reward += reward total_reward2 += reward total_reward = round(total_reward, 2) total_reward2 = round(total_reward2, 2) if params['train']: # train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory agent.remember(state_old, final_move, reward, state_new, game.crash) record = get_record(game.score, record) if params['display']: display(player1, food1, game, record, counter_games, total_reward, params) if params['train']: agent.replay_new(agent.memory, params['batch_size']) counter_games += 1 print( f'Game {counter_games} Score: {game.score} Reward: {total_reward2}' ) score_plot.append(game.score) counter_plot.append(counter_games) if counter_games % params['cf'] == 0: n = int(counter_games / params['cf']) agent.model.save_weights(params['cp'] + str(n) + ".hdf5") print("Checkpoint Saved...") plot_seaborn(counter_plot, score_plot) pygame.quit() quit()
def main(): # Initialisation du reseau agent = DQNAgent() taille = 30 # Nombre de cellules = taille * taille game = Game(taille) # Initialisation du jeu init_ihm(game) # Initialization IHM max_iteration, cpt_iteration = 2, 0 # Nombre de parties à jouer et Compteur de parties rep_lines_bestScore, score_plot, counter_plot = [], [], [ ] # scores, numéros de parties, lignes du meilleur score while (cpt_iteration < max_iteration): game = Game(taille) # Initialisation du jeu game.calculer_lignes_jouables( ) # Calculer les lignes possibles d'etres jouées while not game.crash: # Tant qu'il reste encore des lignes possibles à jouer agent.epsilon = 80 - cpt_iteration state_old = agent.get_state(game) # get old state # perform random actions based on agent.epsilon, or choose the action if randint(0, 200) < agent.epsilon: final_move = to_categorical(randint(0, 2), num_classes=3) else: # predict action based on the old state prediction = agent.model.predict(state_old.reshape((1, 13))) final_move = to_categorical(np.argmax(prediction[0]), num_classes=3) if np.array_equal(final_move, [1, 0, 0]): tmp_line = game.rep_playable_lines[random.randint( 0, int((len(game.rep_playable_lines) - 1) / 3))] elif np.array_equal(final_move, [0, 1, 0]): tmp_line = game.rep_playable_lines[random.randint( int((len(game.rep_playable_lines) - 1) / 3), int((len(game.rep_playable_lines) - 1) * 2 / 3))] elif np.array_equal(final_move, [0, 0, 1]): tmp_line = game.rep_playable_lines[random.randint( int((len(game.rep_playable_lines) - 1) * 2 / 3), int(len(game.rep_playable_lines) - 1))] game.jouer_ligne( tmp_line) # Jouer la ligne tmp_line=[[cellule * 5],direction] game.rep_lines.append( tmp_line) # Ajouter la ligne à la liste des lignes jouées game.calculer_lignes_jouables() # Recalcule des lignes jouables best = 0 if cpt_iteration == 0 or len(game.rep_lines) > max(score_plot): rep_lines_bestScore = game.rep_lines # Sauvegarder la liste des lignes du meilleur score best = 1 if (len(game.rep_playable_lines) == 0 ): # s'il reste plus de ligne jouable game.crash = True else: state_new = agent.get_state(game) reward = agent.set_reward(game.crash, best) # train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory agent.remember(state_old, final_move, reward, state_new, game.crash) if cpt_iteration == 0 or len(game.rep_lines) > max( score_plot): # Si c'est le meilleur score atteint rep_lines_bestScore = game.rep_lines # Sauvegarder la liste des lignes du meilleur score score_plot.append(len( game.rep_lines)) # Ajouter le score à la liste des scores cpt_iteration += 1 print(cpt_iteration, "-Score: ", len(game.rep_lines), " - Best score: ", max(score_plot)) counter_plot.append(cpt_iteration) # ajouter le numéro de la partie agent.replay_new(agent.memory) agent.model.save_weights('weights.hdf5') afficher_lignes_ihm(rep_lines_bestScore, max_iteration, max(score_plot)) plot_seaborn(counter_plot, score_plot) mainloop()
def run_game(): FPS = 60 # Initialize game, settings and create a screen object. pygame.init() fps_clock = pygame.time.Clock() ai_settings = Settings() # FOR THE DQN # agent = DQNAgent() counter_games = 0 score_plot = [] counter_plot = [] record = 0 # FOR THE DQN # while counter_games < 150: # Create statistics. stats = GameStats(ai_settings) # Create game items. game_items = GameItems(ai_settings, stats) # Create a fleet of aliens. gf.create_fleet(ai_settings, game_items) played = False gf.start_new_game(ai_settings, stats, game_items) # Start the main loop for the game. while stats.game_active: stats.time_passed = fps_clock.tick(FPS) / 1000 # Time in seconds since previous loop. gf.check_events(ai_settings, stats, game_items) if stats.game_active: game_items.ship.update(stats) gf.update_bullets(ai_settings, stats, game_items) gf.update_aliens(ai_settings, stats, game_items) # FOR THE DQN # agent.epsilon = 80 - counter_games state_old = gf.get_state(ai_settings, stats, game_items) if randint(0, 200) < agent.epsilon: final_move = to_categorical(randint(0, 3), num_classes=4) else: # predict action based on the old state prediction = agent.model.predict(state_old.reshape((1, 3536))) final_move = to_categorical(np.argmax(prediction[0]), num_classes=4) # FOR THE DQN # # DQN # # perform new move and get new state gf.do_move(final_move, ai_settings, stats, game_items) state_new = gf.get_state(ai_settings, stats, game_items) # set reward for the new state reward = agent.set_reward(stats.score, stats.ships_left) # train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, stats.game_active) # store the new data into a long term memory # TO:DO agent.remember(state_old, final_move, reward, state_new, game.crash) # Get value of played game # TO:DO record = get_record(game.score, record) # DQN # played = True elif played: user = ask(game_items.screen) if len(user) > 0: coll = connect_and_collect() add_score(user , stats.score, coll) played = False # gf.update_screen(ai_settings, stats, game_items) # FOR THE DQN # agent.replay_new(agent.memory) counter_games += 1 print('Game', counter_games, ' Score:', stats.score) score_plot.append(stats.score) counter_plot.append(counter_games) agent.model.save_weights('weights.hdf5') plot_seaborn(counter_plot, score_plot)
def run(display_option, speed, params): if display_option: pygame.init() agent = DQNAgent(params) counter_games = 0 score_plot = [] counter_plot = [] record = 0 while counter_games < params['episodes']: # Initialize classes game = Game(440, 440, display_option) player1 = game.player food1 = game.food # Perform first move initialize_game(player1, game, food1, agent, params['batch_size']) if display_option: display(player1, food1, game, record) step_count = 0 raw_data = [] if params['raw_output'] is not None else None while not game.crash: if display_option: for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() quit() if not params['train']: agent.epsilon = 0 else: # agent.epsilon is set to give randomness to actions agent.epsilon = 1 - (counter_games * params['epsilon_decay_linear']) # get old state state_old = agent.get_state(game, player1, food1) # perform random actions based on agent.epsilon, or choose the action if random() < agent.epsilon or random() < (step_count - 300) / 300: prediction = [0, 0, 0] final_move = to_categorical(randint(0, 2), num_classes=3) else: # predict action based on the old state prediction = agent.model.predict(state_old.reshape((1, 11)))[0] final_move = to_categorical(np.argmax(prediction), num_classes=3) if raw_data is not None: step_data = { 'head_x': player1.x, 'head_y': player1.y, 'food_x': food1.x_food, 'food_y': food1.y_food, 'snake_position': copy.deepcopy(player1.position), 'snake_x_change': player1.x_change, 'snake_y_change': player1.y_change, 'action': final_move.tolist() } # perform new move and get new state player1.do_move(final_move, player1.x, player1.y, game, food1, agent) game.player.action = final_move state_new = agent.get_state(game, player1, food1) # set reward for the new state reward = agent.set_reward(player1, game.crash) if raw_data is not None: step_data['eaten'] = player1.eaten step_data['reward'] = reward step_data['crash'] = game.crash step_data['next_state'] = { 'head_x': player1.x, 'head_y': player1.y, 'food_x': food1.x_food, 'food_y': food1.y_food, 'snake_position': copy.deepcopy(player1.position), 'snake_x_change': player1.x_change, 'snake_y_change': player1.y_change, } raw_data.append(step_data) if params['verbose']: print(prediction, final_move, reward, step_count) if params['train']: # train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory agent.remember(state_old, final_move, reward, state_new, game.crash) record = get_record(game.score, record) if display_option: display(player1, food1, game, record) pygame.time.wait(speed) step_count += 1 if params['train']: agent.replay_new(agent.memory, params['batch_size']) counter_games += 1 print( f'Game {counter_games}/{step_count}/{agent.epsilon} Score: {game.score}' ) if raw_data is not None: fn_index = params['raw_output_index'] + counter_games with open(os.path.join(params['raw_output'], f'{fn_index}.json'), 'w') as out: json.dump(raw_data, out) score_plot.append(game.score) counter_plot.append(counter_games) if params['train']: agent.model.save_weights(params['weights_path']) plot_seaborn(counter_plot, score_plot)
class Simulation(): def __init__(self): self.amountOfSimulations = 0 self.maxSpeed = 4 self.score = 0 self.previousScore = 0 self.highscore = 0 self.highscoreTime = 0 #self.delayCounter = 0 #self.delay = randint(5,10) self.keepRunning = True self.agent = DQNAgent() self.mysystem = chrono.ChSystemNSC() self.ground = theBattleground.theBattleground(self.mysystem) self.createRobot(self.mysystem) self.createApplication() self.run() def createApplication(self): # Create an Irrlicht application to visualize the system self.myapplication = chronoirr.ChIrrApp( self.mysystem, 'PyChrono example', chronoirr.dimension2du(1024, 768)) self.myapplication.AddTypicalSky() self.myapplication.AddTypicalLogo() self.myapplication.AddTypicalCamera(chronoirr.vector3df(0.6, 0.6, 0.8)) self.myapplication.AddLightWithShadow( chronoirr.vector3df(2, 4, 2), # point chronoirr.vector3df(0, 0, 0), # aimpoint 9, # radius (power) 1, 9, # near, far 30) # angle of FOV # ==IMPORTANT!== Use this function for adding a ChIrrNodeAsset to all items # in the system. These ChIrrNodeAsset assets are 'proxies' to the Irrlicht meshes. # If you need a finer control on which item really needs a visualization proxy in # Irrlicht, just use application.AssetBind(myitem); on a per-item basis. self.myapplication.AssetBindAll() # ==IMPORTANT!== Use this function for 'converting' into Irrlicht meshes the assets # that you added to the bodies into 3D shapes, they can be visualized by Irrlicht! self.myapplication.AssetUpdateAll() self.myapplication.AddShadowAll() self.myapplication.SetShowInfos(True) def displayScore(self): print("Score: " + str(self.score) + " Highscore: " + str(self.highscore)) def checkIfDead(self): if (((self.robot.mbody1.GetRot()).Q_to_Rotv()).z < 2.5): print("tilt forward - DEAD") self.keepRunning = False if (((self.robot.mbody1.GetRot()).Q_to_Rotv()).z > 4): print("tilt backward - DEAD") self.keepRunning = False def doMove(self, prediction): self.checkIfDead() print("pred is: ") print(prediction) print(" ") if prediction[0][0] > prediction[0][1]: speed = (prediction[0][0] * self.maxSpeed) self.robot.motor_R.SetMotorFunction(chrono.ChFunction_Const(speed)) self.robot.motor_L.SetMotorFunction(chrono.ChFunction_Const(speed)) elif prediction[0][1] > prediction[0][0]: speed = -(prediction[0][1] * self.maxSpeed) self.robot.motor_R.SetMotorFunction(chrono.ChFunction_Const(speed)) self.robot.motor_L.SetMotorFunction(chrono.ChFunction_Const(speed)) else: self.robot.motor_R.SetMotorFunction(chrono.ChFunction_Const(0)) self.robot.motor_L.SetMotorFunction(chrono.ChFunction_Const(0)) def createRobot(self, system): try: del self.robot except: pass self.robot = theRobot.theRobot(system) def restart(self): print("restart called") del self.myapplication del self.mysystem self.mysystem = chrono.ChSystemNSC() self.ground = theBattleground.theBattleground(self.mysystem) self.createRobot(self.mysystem) self.createApplication() self.mysystem.SetChTime(0) self.keepRunning = True if self.score > self.highscore: self.highscore = self.score self.highscoreTime = self.amountOfSimulations self.previousScore = self.score self.score = 0 def run(self): self.myapplication.SetTimestep(0.1) self.myapplication.SetTryRealtime(False) while self.amountOfSimulations <= 500: self.restart() while (self.myapplication.GetDevice().run() and self.keepRunning): self.agent.epsilon = 80 - self.amountOfSimulations #get old state state_old = self.agent.get_state(self.robot) self.myapplication.BeginScene() self.myapplication.DrawAll() self.myapplication.DoStep() self.myapplication.EndScene() self.checkIfDead() #perform random actions based on agent.epsilon, or choose the action if randint(0, 200) < self.agent.epsilon: #prediction = to_categorical(random.random(), num_classes=2) prediction = [[random.random(), random.random()]] print("random action") else: # predict action based on the old state print("AI action") prediction = self.agent.model.predict( state_old.reshape((1, 2))) #final_move = to_categorical(np.argmax(prediction[0]), num_classes=2) #perform new move and get new state self.doMove(prediction) state_new = self.agent.get_state(self.robot) #self.delay = randint(5, 10) self.delayCounter = 0 self.displayScore() self.score += 1 #self.delayCounter += 1 #set reward for the new state reward = self.agent.set_reward(self.score, self.highscore, self.previousScore, self.amountOfSimulations, self.highscoreTime) #train short memory base on the new action and state self.agent.train_short_memory(state_old, prediction, reward, state_new, self.keepRunning) # store the new data into a long term memory self.agent.remember(state_old, prediction, reward, state_new, self.keepRunning) self.amountOfSimulations += 1 self.agent.replay_new(self.agent.memory) self.agent.model.save_weights('weights.hdf5')
def run(): # Workaround for the current problem of incopability to wirk with CUDA and TF -> using CPU this way in code import os import tensorflow as tf # os.environ['CUDA_VISIBLE_DEVICES'] = '-1' os.environ['CUDA_VISIBLE_DEVICES'] = '' if tf.test.gpu_device_name(): print('[DEBUG] GPU found') else: print("[DEBUG] No GPU found") # till here Workaround pygame.init() agent = DQNAgent() counter_games = 0 score_plot = [] counter_plot = [] record = 0 # while counter_games < 150: while counter_games < 100: # Initialize classes game = Game(440, 440) player1 = game.player food1 = game.food # Perform first move initialize_game(player1, game, food1, agent) if display_option: display(player1, food1, game, record) while not game.crash: #agent.epsilon is set to give randomness to actions agent.epsilon = 80 - counter_games #get old state state_old = agent.get_state(game, player1, food1) #perform random actions based on agent.epsilon, or choose the action if randint(0, 200) < agent.epsilon: final_move = to_categorical(randint(0, 2), num_classes=3) print(f"final_move: {final_move}") # from here random responses for starting the learning final_response = {"type": "endRound"} # till here random responses for starting the learning else: # predict action based on the old state prediction = agent.model.predict(state_old.reshape((1, 11))) final_move = to_categorical(np.argmax(prediction[0]), num_classes=3) #perform new move and get new state player1.do_move(final_move, player1.x, player1.y, game, food1, agent) state_new = agent.get_state(game, player1, food1) #set treward for the new state reward = agent.set_reward(player1, game.crash) #train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory agent.remember(state_old, final_move, reward, state_new, game.crash) record = get_record(game.score, record) if display_option: display(player1, food1, game, record) pygame.time.wait(speed) agent.replay_new(agent.memory) counter_games += 1 print('Game', counter_games, ' Score:', game.score) score_plot.append(game.score) counter_plot.append(counter_games) agent.model.save_weights('weights.hdf5') plot_seaborn(counter_plot, score_plot)
food = Food(game) init(agent, game, player, food) while game.game_over == False: # for random moves agent.epsilon = epsilon - num_games old_state = agent.get_state(game, player, food) if random.randint(0, 200) < agent.epsilon: new_direction = to_categorical(random.randint(0, 4), num_classes=5) else: predict = agent.model.predict(old_state.reshape((1, 13))) new_direction = to_categorical(np.argmax(predict[0]), num_classes=5) # perform move event_handler(player, np.argmax(np.array(new_direction))) update_screen() new_state = agent.get_state(game, player, food) reward = agent.set_reward(game) agent.train_model(old_state, new_direction, reward, new_state, game.game_over) agent.memoize(old_state, new_direction, reward, new_state, game.game_over) clock.tick(game_speed) agent.replay_new(agent.memory) print('Game', num_games, ' Score:', game.score) agent.model.save_weights('weights.hdf5')
def main(): # Initialisation du reseau pygame.init() agent = DQNAgent() score_plot = [] counter_plot = [] record = 0 taille = 30 # Nombre de cellules = taille * taille init_ihm() # Initialization IHM max_iteration = 10 # Nombre de parties cpt_iteration = 0 # Compteur de parties best_score = 0 # Meilleur score rep_lines_bestScore = [] # Liste des lignes du meilleur score while (cpt_iteration < max_iteration): game = Game(taille) # Initialisation du jeu nbr_lignes_crees = 0 # Nombre de lignes crées game.rep_lines.clear() # Mettre la liste des lignes crées à 0 game.calculer_lignes_jouables( ) # Calculer les lignes possibles d'etres jouées while not game.crash: # Tant qu'il reste encore des lignes possibles à jouer agent.epsilon = 80 - cpt_iteration state_old = agent.get_state(game) # get old state #perform random actions based on agent.epsilon, or choose the action if randint(0, 200) < agent.epsilon: final_move = to_categorical(randint(0, 2), num_classes=3) else: # predict action based on the old state prediction = agent.model.predict(state_old.reshape((1, 13))) final_move = to_categorical(np.argmax(prediction[0]), num_classes=3) if np.array_equal(final_move, [1, 0, 0]): tmp_line = game.rep_playable_lines[random.randint( 0, int((len(game.rep_playable_lines) - 1) / 3))] elif np.array_equal(final_move, [0, 1, 0]): tmp_line = game.rep_playable_lines[random.randint( int((len(game.rep_playable_lines) - 1) / 3), int((len(game.rep_playable_lines) - 1) * 2 / 3))] elif np.array_equal(final_move, [0, 0, 1]): tmp_line = game.rep_playable_lines[random.randint( int((len(game.rep_playable_lines) - 1) * 2 / 3), int(len(game.rep_playable_lines) - 1))] game.jouer_ligne( tmp_line) # Jouer la ligne tmp_line=[[cellule * 5],direction] game.rep_lines.append( tmp_line) # Ajouter la ligne à la liste des lignes jouées game.calculer_lignes_jouables() # Recalcule des lignes jouables best = 0 if (len(game.rep_lines) > best_score): best_score = len(game.rep_lines) # Modifier le meilleur score # Sauvegarder la liste des lignes du meilleur score rep_lines_bestScore = game.rep_lines best = 1 if (len(game.rep_playable_lines) == 0): game.crash = True else: state_new = agent.get_state(game) reward = agent.set_reward(game.crash, best) #train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory agent.remember(state_old, final_move, reward, state_new, game.crash) print("Score: ", len(game.rep_lines), " - Best score: ", best_score) score_plot.append(len(game.rep_lines)) counter_plot.append(cpt_iteration) cpt_iteration += 1 agent.replay_new(agent.memory) score_ihm(max_iteration, best_score) agent.model.save_weights('weights.hdf5') plot_seaborn(counter_plot, score_plot) afficher_lignes_ihm(rep_lines_bestScore)
def run(display_option, speed, params): pygame.init() agent = DQNAgent(params) weights_filepath = params['weights_path'] if params['load_weights']: agent.model.load_weights(weights_filepath) print("weights loaded") counter_games = 0 score_plot = [] counter_plot = [] record = 0 while counter_games < params['episodes']: for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() quit() # Initialize classes game = Game(440, 440) player1 = game.player food1 = game.food # Perform first move initialize_game(player1, game, food1, agent, params['batch_size']) if display_option: display(player1, food1, game, record) while not game.crash: if not params['train']: agent.epsilon = 0 else: # agent.epsilon is set to give randomness to actions agent.epsilon = 1 - (counter_games * params['epsilon_decay_linear']) # get old state state_old = agent.get_state(game, player1, food1) # perform random actions based on agent.epsilon, or choose the action if randint(0, 1) < agent.epsilon: final_move = to_categorical(randint(0, 2), num_classes=3) else: # predict action based on the old state prediction = agent.model.predict(state_old.reshape((1, 11))) final_move = to_categorical(np.argmax(prediction[0]), num_classes=3) # perform new move and get new state player1.do_move(final_move, player1.x, player1.y, game, food1, agent) state_new = agent.get_state(game, player1, food1) # set reward for the new state reward = agent.set_reward(player1, game.crash) if params['train']: # train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory agent.remember(state_old, final_move, reward, state_new, game.crash) record = get_record(game.score, record) if display_option: display(player1, food1, game, record) pygame.time.wait(speed) if params['train']: agent.replay_new(agent.memory, params['batch_size']) counter_games += 1 print(f'Game {counter_games} Score: {game.score}') score_plot.append(game.score) counter_plot.append(counter_games) if params['train']: agent.model.save_weights(params['weights_path']) plot_seaborn(counter_plot, score_plot)
def run(display_option, speed, params): pygame.init() agent1 = DQNAgent(params, 1) agent2 = DQNAgent(params, 2) weights_filepath1 = params['weights_path1'] weights_filepath2 = params['weights_path2'] counter_games = 0 record1 = 0 record2 = 0 while counter_games < params['episodes']: if params['load_weights']: agent1.model.load_weights(weights_filepath1) agent2.model.load_weights(weights_filepath2) print("weights loaded") for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() quit() # Initialize classes game = Game(900, 600) player1 = Player(game, 300, 300) player2 = Player(game, 600, 300) # Perform first move initialize_game(player1, player2, game, agent1, agent2, params['batch_size']) if display_option: display(player1, player2, game, record1, record2) while not game.crash: if not params['train']: agent1.epsilon = 0 agent2.epsilon = 0 else: # agent.epsilon is set to give randomness to actions agent1.epsilon = 1 - (counter_games * params['epsilon_decay_linear']) agent2.epsilon = 1 - (counter_games * params['epsilon_decay_linear']) # get old state state_old1 = agent1.get_state(game, player1, player2) state_old2 = agent2.get_state(game, player2, player1) # perform random actions based on agent.epsilon, or choose the action if randint(0, 1) < agent1.epsilon: final_move1 = to_categorical(randint(0, 2), num_classes=3) else: # predict action based on the old state prediction1 = agent1.model.predict(state_old1.reshape((1, 7))) final_move1 = to_categorical(np.argmax(prediction1[0]), num_classes=3) if randint(0, 1) < agent2.epsilon: final_move2 = to_categorical(randint(0, 2), num_classes=3) else: # predict action based on the old state prediction2 = agent2.model.predict(state_old2.reshape((1, 7))) final_move2 = to_categorical(np.argmax(prediction2[0]), num_classes=3) # perform new move and get new state player1.do_move(final_move1, player1.x, player1.y, game, agent1, player2) player2.do_move(final_move2, player2.x, player2.y, game, agent2, player1) state_new1 = agent1.get_state(game, player1, player2) state_new2 = agent2.get_state(game, player2, player1) # set reward for the new state reward1 = agent1.set_reward(player1, player1.crash) reward2 = agent2.set_reward(player2, player2.crash) if params['train']: # train short memory base on the new action and state agent1.train_short_memory(state_old1, final_move1, reward1, state_new1, player1.crash) # store the new data into a long term memory agent1.remember(state_old1, final_move1, reward1, state_new1, player1.crash) agent2.train_short_memory(state_old2, final_move2, reward2, state_new2, player2.crash) # store the new data into a long term memory agent2.remember(state_old2, final_move2, reward2, state_new2, player2.crash) game.score1 += reward1 game.score2 += reward2 record1 = get_record(game.score1, record1) record2 = get_record(game.score2, record2) if display_option: display(player1, player2, game, record1, record2) pygame.time.wait(speed) if player1.crash and player2.crash: game.crash = True counter_games += 1 game.crash = False player1.crash = False player2.crash = False print("score1: " + str(game.score1) + "/n") print("score2: " + str(game.score2) + "/n") if params['train'] and counter_games % 10 == 0: agent1.model.save_weights(params['weights_path1']) agent2.model.save_weights(params['weights_path2']) print("weights saved") if params['train']: agent1.model.save_weights(params['weights_path1']) agent2.model.save_weights(params['weights_path2'])
def train(epoch=10): pygame.init() agent = DQNAgent(output_dim=3) counter_games = 0 score_plot = [] counter_plot = [] record = 0 while counter_games < epoch: # Initialize classes game = Game(440, 440) player1 = game.player field0 = game.field # Perform first move initialize_game(player1, game, field0, agent) if display_option: display(player1, field0, game, record) game_epoch = 0 while not game.crash: #agent.epsilon is set to give randomness to actions agent.epsilon = 50 - game_epoch #get old state state_old = agent.get_state(game, player1, field0) #perform random actions based on agent.epsilon, or choose the action if randint(0, 100) < agent.epsilon: final_move = randint(0, 2) # print("random with prob {}".format(agent.epsilon)) else: # predict action based on the old state prediction = agent.model.predict(state_old) final_move = np.argmax(prediction[0]) print("prediction : {}".format(prediction)) # print("move: {} to position ({}, {})".format(final_move, player1.x, player1.y)) #perform new move and get new state player1.do_move(final_move, field0, game) if game_epoch >= 19: # get new state state_new = agent.get_state(game, player1, field0) #set treward for the new state reward = agent.set_reward(player1, game.crash, final_move) #train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory if_remember = False if game.crash: agent.remember(state_old, final_move, reward, state_new, game.crash) if_remember = True # print("remember this move with reward {}".format(reward)) elif final_move == 0 and randint(1, 20) < 20: agent.remember(state_old, final_move, reward, state_new, game.crash) if_remember = True # print("remember this move with reward {}".format(reward)) elif final_move != 0 and randint(1, 20) < 20: agent.remember(state_old, final_move, reward, state_new, game.crash) if_remember = True # print("remember this move with reward {}".format(reward)) print( "actual move {} to ({}, {}) gets reward {} - remember {}". format(final_move, player1.x, player1.y, reward, if_remember)) # explore other move if final_move == 0: # no # 1 left explore_moves(game, field0, agent, player1, state_old, 1, max(0, player1.x - 1), player1.y) # 2 right explore_moves(game, field0, agent, player1, state_old, 2, min(player1.x + 1, 21), player1.y) elif final_move == 1: # left # 0 no explore_moves(game, field0, agent, player1, state_old, 0, min(player1.x + 1, 21), player1.y) # 2 right explore_moves(game, field0, agent, player1, state_old, 2, min(player1.x + 2, 21), player1.y) elif final_move == 2: # right # 0 no explore_moves(game, field0, agent, player1, state_old, 0, max(0, player1.x - 1), player1.y) # 2 right explore_moves(game, field0, agent, player1, state_old, 1, max(0, player1.x - 2), player1.y) record = get_record(game.score, record) if display_option: display(player1, field0, game, record) pygame.time.wait(speed) game_epoch += 1 agent.replay_new(agent.memory) counter_games += 1 print('Game', counter_games, ' Score:', game.score) score_plot.append(game.score) counter_plot.append(counter_games) if game.score >= record: agent.model.save_weights(modelFile + '/weights.hdf5') agent.model.save_weights(modelFile + '/weightsFinal.hdf5') plot_seaborn(counter_plot, score_plot)
def run(display_option, speed, params): pygame.init() agent = DQNAgent(params) weights_filepath = params['weights_path'] if params['load_weights']: agent.model.load_weights(weights_filepath) print("weights loaded") counter_games = 0 score_plot = [] counter_plot = [] record = 0 while counter_games < params['episodes']: logger.info("===========================") logger.info(f"{info_string}") time_start = time.time() for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() quit() # Initialize classes game = Game(440, 440) player1 = game.player food1 = game.food # Perform first move initialize_game(player1, game, food1, agent, params['batch_size']) if display_option == True: display(player1, food1, game, record) time_start_game_update = time.time() while not game.crash: time_start_game_update_pygame = time.time() if not params['train']: agent.epsilon = 0 else: # agent.epsilon is set to give randomness to actions if agent.epsilon <= params['min_epsilon']: agent.epsilon = params['min_epsilon'] else: agent.epsilon = 1 - (counter_games * params['epsilon_decay_linear']) # get old state state_old, vision = agent.get_state(game, player1, food1) # perform random actions based on agent.epsilon, or choose the action if randint(0, 1) < agent.epsilon: final_move = to_categorical(randint(0, 2), num_classes=3) else: # predict action based on the old state prediction = agent.model.predict( state_old.reshape((1, params['num_input_features']))) final_move = to_categorical(np.argmax(prediction[0]), num_classes=3) # perform new move and get new state player1.do_move(final_move, player1.x, player1.y, game, food1, agent) state_new, vision = agent.get_state(game, player1, food1) # set reward for the new state reward = agent.set_reward(player1, game.crash) time_end_game_update_pygame = time.time() time_start_game_update_train = time.time() if params['train']: # train short memory base on the new action and state agent.train_short_memory(state_old, final_move, reward, state_new, game.crash) # store the new data into a long term memory agent.remember(state_old, final_move, reward, state_new, game.crash) time_end_game_update_train = time.time() time_start_game_update_record = time.time() record = get_record(game.score, record) time_end_game_update_record = time.time() logger.debug("Pygame update step: " + str((time_end_game_update_pygame - time_start_game_update_pygame))) logger.debug("Train short term update step: " + str((time_end_game_update_train - time_start_game_update_train))) logger.debug("Record score step: " + str((time_end_game_update_record - time_start_game_update_record))) if display_option == True: cv2.imshow("Vision of the Snake", vision * 255.0) # detect any kepresses key = cv2.waitKey(1) & 0xFF # if the `q` key was pressed, break from the loop if key == ord("q"): break display(player1, food1, game, record) pygame.time.wait(speed) # # Pause visualisation if crash # if display_option==True: # cv2.imshow("Vision of the Snake", vision * 255.0) # # # detect any kepresses # key = cv2.waitKey(1) & 0xFF # # if the `q` key was pressed, break from the loop # if key == ord("q"): # break # display(player1, food1, game, record) # pygame.time.wait(5000) time_end_game_update = time.time() logger.info( "Time to play one game: " + str(round((time_end_game_update - time_start_game_update), 3))) time_start_long_term = time.time() if params['train']: agent.replay_new(agent.memory, params['batch_size']) time_end_long_term = time.time() logger.info("Train long term update step: " + str(round((time_end_long_term - time_start_long_term), 3))) if agent.epsilon <= params['min_epsilon']: agent.epsilon = params['min_epsilon'] else: agent.epsilon = 1 - (counter_games * params['epsilon_decay_linear']) logger.info(f'The epsilon value is: {agent.epsilon}') logger.debug("===========================") counter_games += 1 logger.info(f'Game {counter_games} Score: {game.score}') logger.info(f'The agent memory length is: {len(agent.memory)}') score_plot.append(game.score) counter_plot.append(counter_games) if params['train'] and counter_games % 100 == 0: agent.model.save_weights(params['weights_path']) logger.info("===========SAVING THE MODEL================") with open(params['memory_path'], 'wb') as handle: pickle.dump(agent.memory, handle) logger.info("End Game Loop") time_end = time.time() epoch_timer = round((time_end - time_start), 3) logger.info(f"One epoch takes: {epoch_timer} seconds") eta_prediction = round( (params['episodes'] - counter_games) * epoch_timer / 60) logger.info(f"Time remaining is: {eta_prediction} minutes") if params['train']: agent.model.save_weights(params['weights_path']) with open(params['memory_path'], 'wb') as handle: pickle.dump(agent.memory, handle) params['counter_plot'] = counter_plot params['score_plot'] = score_plot with open(params['params_path'], 'wb') as handle: pickle.dump(params, handle)