Esempio n. 1
0
            rewards.append(episode_reward)
            num_episodes += 1
            agent.end_episode()

        print "\nTrain Epoch {:02d}: Epsilon {:0.4f} | Avg. Loss {:0.3f} | Avg. Reward {:0.3f}".format(
            epoch, epsilon, np.mean(losses),
            np.sum(rewards) / num_episodes)

        steps, num_episodes = 0, 0
        losses, rewards = [], []

        # display the screen
        env.display_screen = True

        # slow it down so we can watch it fail!
        env.force_fps = False

        # testing loop
        while steps < num_steps_test:
            episode_reward = 0.0
            agent.start_episode()

            while env.game_over() == False and steps < num_steps_test:
                state = env.getGameState()
                reward, action = agent.act(state, epsilon=0.05)

                episode_reward += reward
                steps += 1

                # done watching after 500 steps.
                if steps > 500:
          display_screen=display_screen)

# our Naive agent!
agent = NaiveAgent(env.getActionSet())

# init agent and game.
env.init()

# lets do a random number of NOOP's
for i in range(np.random.randint(0, max_noops)):
    reward = env.act(env.NOOP)

# start our training loop
for f in range(nb_frames):
    # if the game is over
    if env.game_over():
        env.reset_game()

    obs = env.getScreenRGB()
    action = agent.pickAction(reward, obs)
    reward = env.act(action)

    # if f % 50 == 0:
    #     p.saveScreen("tmp/screen_capture.png")

    print f

    if f > 50:
        env.display_screen = True
        env.force_fps = True
        player_y = obs[0]["player_y"]
        print("player y={}".format(player_y), obs[0])
        if player_y > 150:
            return self.actions[0]
        else:
            return None
        # return self.actions[np.random.randint(0, len(self.actions))]


###################################
game = flappybird.FlappyBird()
env = PLE(game, state_preprocessor=lambda x: np.array(x).flatten())
agent = NaiveAgent(env.getActionSet())
env.init()
env.display_screen = True
env.force_fps = False

reward = 0.0
for f in range(15000):
    #if the game is over
    if env.game_over():
        env.reset_game()

    action = agent.pickAction(reward, env.getGameState())
    reward = env.act(action)

    # if f > 2000:
    # env.force_fps = False

    # if f > 2250:
    # env.display_screen = True
p = PLE(game,
        fps=fps,
        frame_skip=frame_skip,
        num_steps=num_steps,
        force_fps=force_fps,
        display_screen=display_screen)

### SET PLAYER ###
agent = LearningPlayer(
    p.getActionSet())  #RandomPlayer, HardCodedPlayer, LearningPlayer

p.init()

for i in range(nb_frames):
    if p.game_over():
        p.reset_game()
        agent.over()

    obs = p.getScreenRGB()
    action = agent.pickAction(reward, obs)
    reward = p.act(action)  #gain in score since last frames
    #print(i)
    if ((i + 1) % 5000) == 0:
        print(i + 1)
        print("Average score: " + str(agent.totalscore / agent.runs))

    if ((i + 1) % 100000) == 0:
        print("Slow")
        print("Average score: " + str(agent.totalscore / agent.runs))
        p.force_fps = False
	"""
	def __init__(self, actions):
		self.actions = actions

	def pickAction(self, reward, obs):
		return self.actions[np.random.randint(0, len(self.actions))]

###################################
game = Doom(scenario="take_cover")

env = PLE(game)
agent = NaiveAgent(env.getActionSet())
env.init()

reward = 0.0
for f in range(15000):
	#if the game is over
        if env.game_over():
            env.reset_game()
            
        action = agent.pickAction(reward, env.getScreenRGB())
        reward = env.act(action)

        if f > 2000:
            env.display_screen = True 
            env.force_fps = False
        
        if f > 2250:
            env.display_screen = True 
            env.force_fps = True
Esempio n. 6
0
def run_game(nb_episodes, agent):
    reward_values = agent.reward_values()
    env = PLE(FlappyBird(),
              fps=30,
              display_screen=False,
              force_fps=True,
              rng=None,
              reward_values=reward_values)
    env.init()
    maxScore = 0  #Highscore
    score = 0  #Current score
    test = 0  #Amount test left
    frames = 0  #Frame counter
    acScore = 0  #Score accumulated
    testAcScore = 0  #Score accumulated for testing
    trainingEpisodes = 100  #Amount of episode to train before testing
    testingEpisodes = 10  #Amount of testing episodes in each test
    avgScore = 0  #Average score
    avgScoresArray = []  #Average score list for the plot
    framesArray = []  #Frames for the plot
    while nb_episodes > 0:
        action = 0
        # start by discretizing and calling the policy
        state = agent.discretize_state(env.game.getGameState())
        if test > 0:
            action = agent.policy(state)
        else:
            action = agent.training_policy(state)
        #Now we have a state action pair, we use the action to act on the environment
        reward = env.act(env.getActionSet()[action])

        #plotting
        if frames % 1000 == 0 and frames != 0:
            avgScore = acScore / (runs - nb_episodes + 1)
            avgScoresArray.append(avgScore)
            framesArray.append(frames)
            plt.plot(framesArray, avgScoresArray)
            plt.savefig(agent.filename)

        frames += 1
        if reward > 0:
            score += reward
            acScore += reward
            testAcScore += reward

        #This bird got far, lets watch it
        if score == 2000:
            env.display_screen = True
            env.force_fps = False
        #Bird is pretty good update us on every 1000 score just to rougly know how he's doing
        if score % 1000 == 0 and score != 0:
            print('episode:', (runs - nb_episodes), 'Big score', score)
        statePrime = agent.discretize_state(env.game.getGameState())

        #dont update while testing
        if test <= 0:
            agent.observe(state, action, reward, statePrime, env.game_over())

        if env.game_over():
            if (runs - nb_episodes) % trainingEpisodes == (trainingEpisodes -
                                                           1):
                #uncomment to see how he is doing while testing
                # env.display_screen = True
                # env.force_fps = False
                test = testingEpisodes
                print('State space:', len(agent.q))
                testAcScore = 0

                #decrease learning rate over time
                agent.learning_rate /= 2
            elif test > 0:
                test -= 1
            else:
                env.display_screen = False
                env.force_fps = True

            #New highscore
            if score > maxScore:
                maxScore = score
                print("Highscore:", maxScore)
            if test > 0:
                avgScore = testAcScore / ((testingEpisodes + 1) - test)
                print("Highscore:", maxScore, "Average:",
                      format(avgScore, '.3f'), "Frame:", frames, "Episode:",
                      runs - nb_episodes + 1, " Score:", score)
            if frames == 1000000:
                print(
                    "*****************************************************************************\nFrame limit reached\n**********************************************************"
                )
            env.reset_game()
            nb_episodes -= 1
            score = 0
Esempio n. 7
0
 
# Create a game instance
game = FlappyBird() 
 
# Pass the game instance to the PLE
p = PLE(game)
 
# Create the agent
agent = NaiveAgent(p.getActionSet())
 
# Initialize the environment
p.init()
 
actions = p.getActionSet()
action_dict = {0: actions[1], 1:actions[0]}
 
reward = 0.0
 
for f in range(15000):
 
  # If the game is over
  if p.game_over():
    p.reset_game()
 
  action = agent.pickAction(reward, p.getScreenRGB())
  reward = p.act(action)
 
  if f > 1000:
    p.display_screen = True
    p.force_fps = False # Slow screen
        if (i.fitness > maior.fitness): maior = i

config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                     neat.DefaultSpeciesSet, neat.DefaultStagnation,
                     config_path)
maior = neat.nn.FeedForwardNetwork.create(maior, config)

Score = 0

app = QApplication([])
window = QMainWindow()
lcd = Ui_MainWindow()
lcd.setupUi(window)

while (True):
    State1 = ENV.game.getGameState()

    INP1 = State1["player_y"]
    INP2 = State1["next_pipe_bottom_y"]
    INP3 = (-State1["next_pipe_top_y"] + State1["next_pipe_bottom_y"]) / 2
    OUTPUT = maior.activate((INP1, INP2, INP3))

    VAL = 119 if OUTPUT[0] >= 0.4 else None
    RESP = ENV.act(VAL)
    ENV.display_screen = True
    ENV.force_fps = False

    if (RESP > 0):
        Score += 1
        lcd.Adicionar_Score(Score)