def pickAction(self, reward, obs=None): player_y = obs[0]["player_y"] print("player y={}".format(player_y), obs[0]) if player_y > 150: return self.actions[0] else: return None # return self.actions[np.random.randint(0, len(self.actions))] ################################### game = flappybird.FlappyBird() env = PLE(game, state_preprocessor=lambda x: np.array(x).flatten()) agent = NaiveAgent(env.getActionSet()) env.init() env.display_screen = True env.force_fps = False reward = 0.0 for f in range(15000): #if the game is over if env.game_over(): env.reset_game() action = agent.pickAction(reward, env.getGameState()) reward = env.act(action) # if f > 2000: # env.force_fps = False # if f > 2250:
num_frames, frame_skip, lr, discount, rng, optimizer="sgd_nesterov") agent.build_model() memory = ReplayMemory(max_memory_size, min_memory_size) env.init() for epoch in range(1, num_epochs + 1): steps, num_episodes = 0, 0 losses, rewards = [], [] env.display_screen = False # training loop while steps < num_steps_train: episode_reward = 0.0 agent.start_episode() while env.game_over() == False and steps < num_steps_train: state = env.getGameState() reward, action = agent.act(state, epsilon=epsilon) memory.add([state, action, reward, env.game_over()]) if steps % update_frequency == 0: loss = memory.train_agent_batch(agent) if loss is not None:
if (tuple[1] == -1): tuple[1] = fitFuncWrapper(tuple[0]) #sort list by fitness value pop.sort(key=lambda tup: tup[1], reverse=True) print("Fit values for this generation: "), for i in range(0, len(pop)): print(pop[i][1]), print("") #show best individual #p.display_screen=True bestFit = pop[0][1] if (countShow > 100): p.display_screen = True fitFuncWrapper(pop[0][0]) p.display_screen = False countShow = 0 #dump best fit! dumpNeuralNet(pop[0][0]) #get N best genomes #remove the worst of this generation pop.pop() firstBest = copy.deepcopy(pop[0][0]) secondBest = copy.deepcopy(pop[1][0]) newIndividual = procreate2(firstBest, secondBest) for i in range(mutationRate):
def agent_training(agent_file_path, agent_file_name, fig_path, num_steps_train_total = 5000): # training parameters num_epochs = 5 num_steps_train_epoch = num_steps_train_total/num_epochs # steps per epoch of training num_steps_test = 100 update_frequency = 10 # step frequency of model training/updates epsilon = 0.15 # percentage of time we perform a random action, help exploration. epsilon_steps = 1000 # decay steps epsilon_min = 0.1 epsilon_rate = (epsilon - epsilon_min) / epsilon_steps # memory settings max_memory_size = 10000 min_memory_size = 60 # number needed before model training starts game = RunningMinion() env = PLE(game, fps=30, display_screen=True, force_fps=True, state_preprocessor=process_state) my_agent = init_agent(env) memory = utils.ReplayMemory(max_memory_size, min_memory_size) env.init() # Logging configuration and figure plotting logging.basicConfig(filename='../learning.log', filemode='w', level=logging.DEBUG, format='%(levelname)s:%(message)s') logging.info('========================================================') logging.info('Training started for total training steps: '+str(num_steps_train_total)+'.\n') learning_rewards = [0] testing_rewards = [0] for epoch in range(1, num_epochs + 1): steps, num_episodes = 0, 0 losses, rewards = [], [] env.display_screen = False # training loop while steps < num_steps_train_epoch: episode_reward = 0.0 my_agent.start_episode() while env.game_over() == False and steps < num_steps_train_epoch: state = env.getGameState() reward, action = my_agent.act(state, epsilon=epsilon) memory.add([state, action, reward, env.game_over()]) if steps % update_frequency == 0: loss = memory.train_agent_batch(my_agent) if loss is not None: losses.append(loss) epsilon = np.max(epsilon_min, epsilon - epsilon_rate) episode_reward += reward steps += 1 if steps < num_steps_train_epoch: learning_rewards.append(episode_reward) if num_episodes % 5 == 0: # print "Episode {:01d}: Reward {:0.1f}".format(num_episodes, episode_reward) logging.info("Episode {:01d}: Reward {:0.1f}".format(num_episodes, episode_reward)) rewards.append(episode_reward) num_episodes += 1 my_agent.end_episode() logging.info("Train Epoch {:02d}: Epsilon {:0.4f} | Avg. Loss {:0.3f} | Avg. Reward {:0.3f}\n" .format(epoch, epsilon, np.mean(losses), np.sum(rewards) / num_episodes)) steps, num_episodes = 0, 0 losses, rewards = [], [] # testing loop while steps < num_steps_test: episode_reward = 0.0 my_agent.start_episode() while env.game_over() == False and steps < num_steps_test: state = env.getGameState() reward, action = my_agent.act(state, epsilon=0.05) episode_reward += reward testing_rewards.append(testing_rewards[-1]+reward) steps += 1 # done watching after 500 steps. if steps > 500: env.display_screen = False if num_episodes % 5 == 0: logging.info("Episode {:01d}: Reward {:0.1f}".format(num_episodes, episode_reward)) if steps < num_steps_test: testing_rewards.append(episode_reward) rewards.append(episode_reward) num_episodes += 1 my_agent.end_episode() logging.info("Test Epoch {:02d}: Best Reward {:0.3f} | Avg. Reward {:0.3f}\n" .format(epoch, np.max(rewards), np.sum(rewards) / num_episodes)) logging.info("Training complete.\n\n") plot_figure(fig_path, learning_rewards, 'reward', 'reward_in_training', num_steps_train_total) plot_figure(fig_path, testing_rewards, 'reward', 'reward_in_testing', num_steps_train_total) save_agent(my_agent, agent_file_path, agent_file_name)
""" def __init__(self, actions): self.actions = actions def pickAction(self, reward, obs): return self.actions[np.random.randint(0, len(self.actions))] ################################### game = Doom(scenario="take_cover") env = PLE(game) agent = NaiveAgent(env.getActionSet()) env.init() reward = 0.0 for f in range(15000): #if the game is over if env.game_over(): env.reset_game() action = agent.pickAction(reward, env.getScreenRGB()) reward = env.act(action) if f > 2000: env.display_screen = True env.force_fps = False if f > 2250: env.display_screen = True env.force_fps = True
#PLE takes our game and the state_preprocessor. It will process the state for our agent. game = Catcher(width=128, height=128) env = PLE(game, fps=60, state_preprocessor=nv_state_preprocessor) agent = Agent(env, batch_size, num_frames, frame_skip, lr, discount, rng, optimizer="sgd_nesterov") agent.build_model() memory = ReplayMemory(max_memory_size, min_memory_size) env.init() for epoch in range(1, num_epochs+1): steps, num_episodes = 0, 0 losses, rewards = [], [] env.display_screen = False #training loop while steps < num_steps_train: episode_reward = 0.0 agent.start_episode() while env.game_over() == False and steps < num_steps_train: state = env.getGameState() reward, action = agent.act(state, epsilon=epsilon) memory.add([ state, action, reward, env.game_over() ]) if steps % update_frequency == 0: loss = memory.train_agent_batch(agent) if loss is not None:
def run_game(nb_episodes, agent): reward_values = agent.reward_values() env = PLE(FlappyBird(), fps=30, display_screen=False, force_fps=True, rng=None, reward_values=reward_values) env.init() maxScore = 0 #Highscore score = 0 #Current score test = 0 #Amount test left frames = 0 #Frame counter acScore = 0 #Score accumulated testAcScore = 0 #Score accumulated for testing trainingEpisodes = 100 #Amount of episode to train before testing testingEpisodes = 10 #Amount of testing episodes in each test avgScore = 0 #Average score avgScoresArray = [] #Average score list for the plot framesArray = [] #Frames for the plot while nb_episodes > 0: action = 0 # start by discretizing and calling the policy state = agent.discretize_state(env.game.getGameState()) if test > 0: action = agent.policy(state) else: action = agent.training_policy(state) #Now we have a state action pair, we use the action to act on the environment reward = env.act(env.getActionSet()[action]) #plotting if frames % 1000 == 0 and frames != 0: avgScore = acScore / (runs - nb_episodes + 1) avgScoresArray.append(avgScore) framesArray.append(frames) plt.plot(framesArray, avgScoresArray) plt.savefig(agent.filename) frames += 1 if reward > 0: score += reward acScore += reward testAcScore += reward #This bird got far, lets watch it if score == 2000: env.display_screen = True env.force_fps = False #Bird is pretty good update us on every 1000 score just to rougly know how he's doing if score % 1000 == 0 and score != 0: print('episode:', (runs - nb_episodes), 'Big score', score) statePrime = agent.discretize_state(env.game.getGameState()) #dont update while testing if test <= 0: agent.observe(state, action, reward, statePrime, env.game_over()) if env.game_over(): if (runs - nb_episodes) % trainingEpisodes == (trainingEpisodes - 1): #uncomment to see how he is doing while testing # env.display_screen = True # env.force_fps = False test = testingEpisodes print('State space:', len(agent.q)) testAcScore = 0 #decrease learning rate over time agent.learning_rate /= 2 elif test > 0: test -= 1 else: env.display_screen = False env.force_fps = True #New highscore if score > maxScore: maxScore = score print("Highscore:", maxScore) if test > 0: avgScore = testAcScore / ((testingEpisodes + 1) - test) print("Highscore:", maxScore, "Average:", format(avgScore, '.3f'), "Frame:", frames, "Episode:", runs - nb_episodes + 1, " Score:", score) if frames == 1000000: print( "*****************************************************************************\nFrame limit reached\n**********************************************************" ) env.reset_game() nb_episodes -= 1 score = 0
if (i.fitness > maior.fitness): maior = i config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction, neat.DefaultSpeciesSet, neat.DefaultStagnation, config_path) maior = neat.nn.FeedForwardNetwork.create(maior, config) Score = 0 app = QApplication([]) window = QMainWindow() lcd = Ui_MainWindow() lcd.setupUi(window) while (True): State1 = ENV.game.getGameState() INP1 = State1["player_y"] INP2 = State1["next_pipe_bottom_y"] INP3 = (-State1["next_pipe_top_y"] + State1["next_pipe_bottom_y"]) / 2 OUTPUT = maior.activate((INP1, INP2, INP3)) VAL = 119 if OUTPUT[0] >= 0.4 else None RESP = ENV.act(VAL) ENV.display_screen = True ENV.force_fps = False if (RESP > 0): Score += 1 lcd.Adicionar_Score(Score)
def agent_training(agent_file_path, agent_file_name, fig_path, num_steps_train_total = 5000): # training parameters num_epochs = 5 num_steps_train_epoch = num_steps_train_total/num_epochs # steps per epoch of training num_steps_test = 100 update_frequency = 10 # step frequency of model training/updates epsilon = 0.15 # percentage of time we perform a random action, help exploration. epsilon_steps = 1000 # decay steps epsilon_min = 0.1 epsilon_rate = (epsilon - epsilon_min) / epsilon_steps # memory settings max_memory_size = 10000 min_memory_size = 60 # number needed before model training starts game = FlappyBird() env = PLE(game, fps=30, display_screen=True, force_fps=True, state_preprocessor=process_state) my_agent = init_agent(env) memory = utils.ReplayMemory(max_memory_size, min_memory_size) env.init() # Logging configuration and figure plotting logging.basicConfig(filename='../learning.log', filemode='w', level=logging.DEBUG, format='%(levelname)s:%(message)s') logging.info('========================================================') logging.info('Training started for total training steps: '+str(num_steps_train_total)+'.\n') learning_rewards = [0] testing_rewards = [0] for epoch in range(1, num_epochs + 1): steps, num_episodes = 0, 0 losses, rewards = [], [] env.display_screen = False # training loop while steps < num_steps_train_epoch: episode_reward = 0.0 my_agent.start_episode() while env.game_over() == False and steps < num_steps_train_epoch: state = env.getGameState() reward, action = my_agent.act(state, epsilon=epsilon) memory.add([state, action, reward, env.game_over()]) if steps % update_frequency == 0: loss = memory.train_agent_batch(my_agent) if loss is not None: losses.append(loss) epsilon = np.max(epsilon_min, epsilon - epsilon_rate) episode_reward += reward steps += 1 if steps < num_steps_train_epoch: learning_rewards.append(episode_reward) if num_episodes % 5 == 0: # print "Episode {:01d}: Reward {:0.1f}".format(num_episodes, episode_reward) logging.info("Episode {:01d}: Reward {:0.1f}".format(num_episodes, episode_reward)) rewards.append(episode_reward) num_episodes += 1 my_agent.end_episode() # print "Train Epoch {:02d}: Epsilon {:0.4f} | Avg. Loss {:0.3f} | Avg. Reward {:0.3f}\n"\ # .format(epoch, epsilon, np.mean(losses), np.sum(rewards) / num_episodes) logging.info("Train Epoch {:02d}: Epsilon {:0.4f} | Avg. Loss {:0.3f} | Avg. Reward {:0.3f}\n" .format(epoch, epsilon, np.mean(losses), np.sum(rewards) / num_episodes)) steps, num_episodes = 0, 0 losses, rewards = [], [] # display the screen # env.display_screen = True # slow it down so we can watch it fail! # env.force_fps = True # testing loop while steps < num_steps_test: episode_reward = 0.0 my_agent.start_episode() while env.game_over() == False and steps < num_steps_test: state = env.getGameState() reward, action = my_agent.act(state, epsilon=0.05) episode_reward += reward testing_rewards.append(testing_rewards[-1]+reward) steps += 1 # done watching after 500 steps. if steps > 500: env.display_screen = False if num_episodes % 5 == 0: # print "Episode {:01d}: Reward {:0.1f}".format(num_episodes, episode_reward) logging.info("Episode {:01d}: Reward {:0.1f}".format(num_episodes, episode_reward)) if steps < num_steps_test: testing_rewards.append(episode_reward) rewards.append(episode_reward) num_episodes += 1 my_agent.end_episode() # print "Test Epoch {:02d}: Best Reward {:0.3f} | Avg. Reward {:0.3f}\n"\ # .format(epoch, np.max(rewards), np.sum(rewards) / num_episodes) logging.info("Test Epoch {:02d}: Best Reward {:0.3f} | Avg. Reward {:0.3f}\n" .format(epoch, np.max(rewards), np.sum(rewards) / num_episodes)) logging.info("Training complete.\n\n") plot_figure(fig_path, learning_rewards, 'reward', 'reward_in_training', num_steps_train_total) plot_figure(fig_path, testing_rewards, 'reward', 'reward_in_testing', num_steps_train_total) save_agent(my_agent, agent_file_path, agent_file_name)