rewards.append(episode_reward) num_episodes += 1 agent.end_episode() print "\nTrain Epoch {:02d}: Epsilon {:0.4f} | Avg. Loss {:0.3f} | Avg. Reward {:0.3f}".format( epoch, epsilon, np.mean(losses), np.sum(rewards) / num_episodes) steps, num_episodes = 0, 0 losses, rewards = [], [] # display the screen env.display_screen = True # slow it down so we can watch it fail! env.force_fps = False # testing loop while steps < num_steps_test: episode_reward = 0.0 agent.start_episode() while env.game_over() == False and steps < num_steps_test: state = env.getGameState() reward, action = agent.act(state, epsilon=0.05) episode_reward += reward steps += 1 # done watching after 500 steps. if steps > 500:
display_screen=display_screen) # our Naive agent! agent = NaiveAgent(env.getActionSet()) # init agent and game. env.init() # lets do a random number of NOOP's for i in range(np.random.randint(0, max_noops)): reward = env.act(env.NOOP) # start our training loop for f in range(nb_frames): # if the game is over if env.game_over(): env.reset_game() obs = env.getScreenRGB() action = agent.pickAction(reward, obs) reward = env.act(action) # if f % 50 == 0: # p.saveScreen("tmp/screen_capture.png") print f if f > 50: env.display_screen = True env.force_fps = True
player_y = obs[0]["player_y"] print("player y={}".format(player_y), obs[0]) if player_y > 150: return self.actions[0] else: return None # return self.actions[np.random.randint(0, len(self.actions))] ################################### game = flappybird.FlappyBird() env = PLE(game, state_preprocessor=lambda x: np.array(x).flatten()) agent = NaiveAgent(env.getActionSet()) env.init() env.display_screen = True env.force_fps = False reward = 0.0 for f in range(15000): #if the game is over if env.game_over(): env.reset_game() action = agent.pickAction(reward, env.getGameState()) reward = env.act(action) # if f > 2000: # env.force_fps = False # if f > 2250: # env.display_screen = True
p = PLE(game, fps=fps, frame_skip=frame_skip, num_steps=num_steps, force_fps=force_fps, display_screen=display_screen) ### SET PLAYER ### agent = LearningPlayer( p.getActionSet()) #RandomPlayer, HardCodedPlayer, LearningPlayer p.init() for i in range(nb_frames): if p.game_over(): p.reset_game() agent.over() obs = p.getScreenRGB() action = agent.pickAction(reward, obs) reward = p.act(action) #gain in score since last frames #print(i) if ((i + 1) % 5000) == 0: print(i + 1) print("Average score: " + str(agent.totalscore / agent.runs)) if ((i + 1) % 100000) == 0: print("Slow") print("Average score: " + str(agent.totalscore / agent.runs)) p.force_fps = False
""" def __init__(self, actions): self.actions = actions def pickAction(self, reward, obs): return self.actions[np.random.randint(0, len(self.actions))] ################################### game = Doom(scenario="take_cover") env = PLE(game) agent = NaiveAgent(env.getActionSet()) env.init() reward = 0.0 for f in range(15000): #if the game is over if env.game_over(): env.reset_game() action = agent.pickAction(reward, env.getScreenRGB()) reward = env.act(action) if f > 2000: env.display_screen = True env.force_fps = False if f > 2250: env.display_screen = True env.force_fps = True
def run_game(nb_episodes, agent): reward_values = agent.reward_values() env = PLE(FlappyBird(), fps=30, display_screen=False, force_fps=True, rng=None, reward_values=reward_values) env.init() maxScore = 0 #Highscore score = 0 #Current score test = 0 #Amount test left frames = 0 #Frame counter acScore = 0 #Score accumulated testAcScore = 0 #Score accumulated for testing trainingEpisodes = 100 #Amount of episode to train before testing testingEpisodes = 10 #Amount of testing episodes in each test avgScore = 0 #Average score avgScoresArray = [] #Average score list for the plot framesArray = [] #Frames for the plot while nb_episodes > 0: action = 0 # start by discretizing and calling the policy state = agent.discretize_state(env.game.getGameState()) if test > 0: action = agent.policy(state) else: action = agent.training_policy(state) #Now we have a state action pair, we use the action to act on the environment reward = env.act(env.getActionSet()[action]) #plotting if frames % 1000 == 0 and frames != 0: avgScore = acScore / (runs - nb_episodes + 1) avgScoresArray.append(avgScore) framesArray.append(frames) plt.plot(framesArray, avgScoresArray) plt.savefig(agent.filename) frames += 1 if reward > 0: score += reward acScore += reward testAcScore += reward #This bird got far, lets watch it if score == 2000: env.display_screen = True env.force_fps = False #Bird is pretty good update us on every 1000 score just to rougly know how he's doing if score % 1000 == 0 and score != 0: print('episode:', (runs - nb_episodes), 'Big score', score) statePrime = agent.discretize_state(env.game.getGameState()) #dont update while testing if test <= 0: agent.observe(state, action, reward, statePrime, env.game_over()) if env.game_over(): if (runs - nb_episodes) % trainingEpisodes == (trainingEpisodes - 1): #uncomment to see how he is doing while testing # env.display_screen = True # env.force_fps = False test = testingEpisodes print('State space:', len(agent.q)) testAcScore = 0 #decrease learning rate over time agent.learning_rate /= 2 elif test > 0: test -= 1 else: env.display_screen = False env.force_fps = True #New highscore if score > maxScore: maxScore = score print("Highscore:", maxScore) if test > 0: avgScore = testAcScore / ((testingEpisodes + 1) - test) print("Highscore:", maxScore, "Average:", format(avgScore, '.3f'), "Frame:", frames, "Episode:", runs - nb_episodes + 1, " Score:", score) if frames == 1000000: print( "*****************************************************************************\nFrame limit reached\n**********************************************************" ) env.reset_game() nb_episodes -= 1 score = 0
# Create a game instance game = FlappyBird() # Pass the game instance to the PLE p = PLE(game) # Create the agent agent = NaiveAgent(p.getActionSet()) # Initialize the environment p.init() actions = p.getActionSet() action_dict = {0: actions[1], 1:actions[0]} reward = 0.0 for f in range(15000): # If the game is over if p.game_over(): p.reset_game() action = agent.pickAction(reward, p.getScreenRGB()) reward = p.act(action) if f > 1000: p.display_screen = True p.force_fps = False # Slow screen
if (i.fitness > maior.fitness): maior = i config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction, neat.DefaultSpeciesSet, neat.DefaultStagnation, config_path) maior = neat.nn.FeedForwardNetwork.create(maior, config) Score = 0 app = QApplication([]) window = QMainWindow() lcd = Ui_MainWindow() lcd.setupUi(window) while (True): State1 = ENV.game.getGameState() INP1 = State1["player_y"] INP2 = State1["next_pipe_bottom_y"] INP3 = (-State1["next_pipe_top_y"] + State1["next_pipe_bottom_y"]) / 2 OUTPUT = maior.activate((INP1, INP2, INP3)) VAL = 119 if OUTPUT[0] >= 0.4 else None RESP = ENV.act(VAL) ENV.display_screen = True ENV.force_fps = False if (RESP > 0): Score += 1 lcd.Adicionar_Score(Score)