def pickAction(self, reward, obs=None):
        player_y = obs[0]["player_y"]
        print("player y={}".format(player_y), obs[0])
        if player_y > 150:
            return self.actions[0]
        else:
            return None
        # return self.actions[np.random.randint(0, len(self.actions))]


###################################
game = flappybird.FlappyBird()
env = PLE(game, state_preprocessor=lambda x: np.array(x).flatten())
agent = NaiveAgent(env.getActionSet())
env.init()
env.display_screen = True
env.force_fps = False

reward = 0.0
for f in range(15000):
    #if the game is over
    if env.game_over():
        env.reset_game()

    action = agent.pickAction(reward, env.getGameState())
    reward = env.act(action)

    # if f > 2000:
    # env.force_fps = False

    # if f > 2250:
Esempio n. 2
0
                  num_frames,
                  frame_skip,
                  lr,
                  discount,
                  rng,
                  optimizer="sgd_nesterov")
    agent.build_model()

    memory = ReplayMemory(max_memory_size, min_memory_size)

    env.init()

    for epoch in range(1, num_epochs + 1):
        steps, num_episodes = 0, 0
        losses, rewards = [], []
        env.display_screen = False

        # training loop
        while steps < num_steps_train:
            episode_reward = 0.0
            agent.start_episode()

            while env.game_over() == False and steps < num_steps_train:
                state = env.getGameState()
                reward, action = agent.act(state, epsilon=epsilon)
                memory.add([state, action, reward, env.game_over()])

                if steps % update_frequency == 0:
                    loss = memory.train_agent_batch(agent)

                    if loss is not None:
Esempio n. 3
0
        if (tuple[1] == -1):
            tuple[1] = fitFuncWrapper(tuple[0])

    #sort list by fitness value
    pop.sort(key=lambda tup: tup[1], reverse=True)

    print("Fit values for this generation: "),
    for i in range(0, len(pop)):
        print(pop[i][1]),
    print("")
    #show best individual
    #p.display_screen=True
    bestFit = pop[0][1]

    if (countShow > 100):
        p.display_screen = True
        fitFuncWrapper(pop[0][0])
        p.display_screen = False
        countShow = 0
        #dump best fit!
        dumpNeuralNet(pop[0][0])

    #get N best genomes
    #remove the worst of this generation
    pop.pop()

    firstBest = copy.deepcopy(pop[0][0])
    secondBest = copy.deepcopy(pop[1][0])

    newIndividual = procreate2(firstBest, secondBest)
    for i in range(mutationRate):
Esempio n. 4
0
def agent_training(agent_file_path, agent_file_name, fig_path, num_steps_train_total = 5000):
    # training parameters
    num_epochs = 5
    num_steps_train_epoch = num_steps_train_total/num_epochs  # steps per epoch of training
    num_steps_test = 100
    update_frequency = 10  # step frequency of model training/updates

    epsilon = 0.15  # percentage of time we perform a random action, help exploration.
    epsilon_steps = 1000  # decay steps
    epsilon_min = 0.1
    epsilon_rate = (epsilon - epsilon_min) / epsilon_steps

    # memory settings
    max_memory_size = 10000
    min_memory_size = 60  # number needed before model training starts

    game = RunningMinion()
    env = PLE(game, fps=30, display_screen=True, force_fps=True, state_preprocessor=process_state)
    my_agent = init_agent(env)

    memory = utils.ReplayMemory(max_memory_size, min_memory_size)
    env.init()

    # Logging configuration and figure plotting
    logging.basicConfig(filename='../learning.log', filemode='w',
                        level=logging.DEBUG, format='%(levelname)s:%(message)s')
    logging.info('========================================================')
    logging.info('Training started for total training steps: '+str(num_steps_train_total)+'.\n')
    learning_rewards = [0]
    testing_rewards = [0]

    for epoch in range(1, num_epochs + 1):
        steps, num_episodes = 0, 0
        losses, rewards = [], []
        env.display_screen = False

        # training loop
        while steps < num_steps_train_epoch:
            episode_reward = 0.0
            my_agent.start_episode()

            while env.game_over() == False and steps < num_steps_train_epoch:
                state = env.getGameState()
                reward, action = my_agent.act(state, epsilon=epsilon)
                memory.add([state, action, reward, env.game_over()])

                if steps % update_frequency == 0:
                    loss = memory.train_agent_batch(my_agent)

                    if loss is not None:
                        losses.append(loss)
                        epsilon = np.max(epsilon_min, epsilon - epsilon_rate)

                episode_reward += reward
                steps += 1

            if steps < num_steps_train_epoch:
                learning_rewards.append(episode_reward)

            if num_episodes % 5 == 0:
                # print "Episode {:01d}: Reward {:0.1f}".format(num_episodes, episode_reward)
                logging.info("Episode {:01d}: Reward {:0.1f}".format(num_episodes, episode_reward))

            rewards.append(episode_reward)
            num_episodes += 1
            my_agent.end_episode()

        logging.info("Train Epoch {:02d}: Epsilon {:0.4f} | Avg. Loss {:0.3f} | Avg. Reward {:0.3f}\n"
                     .format(epoch, epsilon, np.mean(losses), np.sum(rewards) / num_episodes))

        steps, num_episodes = 0, 0
        losses, rewards = [], []

        # testing loop
        while steps < num_steps_test:
            episode_reward = 0.0
            my_agent.start_episode()

            while env.game_over() == False and steps < num_steps_test:
                state = env.getGameState()
                reward, action = my_agent.act(state, epsilon=0.05)

                episode_reward += reward
                testing_rewards.append(testing_rewards[-1]+reward)
                steps += 1

                # done watching after 500 steps.
                if steps > 500:
                    env.display_screen = False

            if num_episodes % 5 == 0:
                logging.info("Episode {:01d}: Reward {:0.1f}".format(num_episodes, episode_reward))

            if steps < num_steps_test:
                testing_rewards.append(episode_reward)

            rewards.append(episode_reward)
            num_episodes += 1
            my_agent.end_episode()

        logging.info("Test Epoch {:02d}: Best Reward {:0.3f} | Avg. Reward {:0.3f}\n"
                     .format(epoch, np.max(rewards), np.sum(rewards) / num_episodes))

    logging.info("Training complete.\n\n")
    plot_figure(fig_path, learning_rewards, 'reward', 'reward_in_training', num_steps_train_total)
    plot_figure(fig_path, testing_rewards, 'reward', 'reward_in_testing', num_steps_train_total)

    save_agent(my_agent, agent_file_path, agent_file_name)
	"""
	def __init__(self, actions):
		self.actions = actions

	def pickAction(self, reward, obs):
		return self.actions[np.random.randint(0, len(self.actions))]

###################################
game = Doom(scenario="take_cover")

env = PLE(game)
agent = NaiveAgent(env.getActionSet())
env.init()

reward = 0.0
for f in range(15000):
	#if the game is over
        if env.game_over():
            env.reset_game()
            
        action = agent.pickAction(reward, env.getScreenRGB())
        reward = env.act(action)

        if f > 2000:
            env.display_screen = True 
            env.force_fps = False
        
        if f > 2250:
            env.display_screen = True 
            env.force_fps = True
    #PLE takes our game and the state_preprocessor. It will process the state for our agent.
    game = Catcher(width=128, height=128) 
    env = PLE(game, fps=60, state_preprocessor=nv_state_preprocessor)

    agent = Agent(env, batch_size, num_frames, frame_skip, lr, 
            discount, rng, optimizer="sgd_nesterov")
    agent.build_model()

    memory = ReplayMemory(max_memory_size, min_memory_size)

    env.init()
    
    for epoch in range(1, num_epochs+1):
        steps, num_episodes = 0, 0
        losses, rewards = [], []
        env.display_screen = False
       
        #training loop
        while steps < num_steps_train:
            episode_reward = 0.0
            agent.start_episode()

            while env.game_over() == False and steps < num_steps_train:
                state = env.getGameState()
                reward, action = agent.act(state, epsilon=epsilon)
                memory.add([ state, action, reward, env.game_over() ])

                if steps % update_frequency == 0:
                    loss = memory.train_agent_batch(agent)
                    
                    if loss is not None:
Esempio n. 7
0
def run_game(nb_episodes, agent):
    reward_values = agent.reward_values()
    env = PLE(FlappyBird(),
              fps=30,
              display_screen=False,
              force_fps=True,
              rng=None,
              reward_values=reward_values)
    env.init()
    maxScore = 0  #Highscore
    score = 0  #Current score
    test = 0  #Amount test left
    frames = 0  #Frame counter
    acScore = 0  #Score accumulated
    testAcScore = 0  #Score accumulated for testing
    trainingEpisodes = 100  #Amount of episode to train before testing
    testingEpisodes = 10  #Amount of testing episodes in each test
    avgScore = 0  #Average score
    avgScoresArray = []  #Average score list for the plot
    framesArray = []  #Frames for the plot
    while nb_episodes > 0:
        action = 0
        # start by discretizing and calling the policy
        state = agent.discretize_state(env.game.getGameState())
        if test > 0:
            action = agent.policy(state)
        else:
            action = agent.training_policy(state)
        #Now we have a state action pair, we use the action to act on the environment
        reward = env.act(env.getActionSet()[action])

        #plotting
        if frames % 1000 == 0 and frames != 0:
            avgScore = acScore / (runs - nb_episodes + 1)
            avgScoresArray.append(avgScore)
            framesArray.append(frames)
            plt.plot(framesArray, avgScoresArray)
            plt.savefig(agent.filename)

        frames += 1
        if reward > 0:
            score += reward
            acScore += reward
            testAcScore += reward

        #This bird got far, lets watch it
        if score == 2000:
            env.display_screen = True
            env.force_fps = False
        #Bird is pretty good update us on every 1000 score just to rougly know how he's doing
        if score % 1000 == 0 and score != 0:
            print('episode:', (runs - nb_episodes), 'Big score', score)
        statePrime = agent.discretize_state(env.game.getGameState())

        #dont update while testing
        if test <= 0:
            agent.observe(state, action, reward, statePrime, env.game_over())

        if env.game_over():
            if (runs - nb_episodes) % trainingEpisodes == (trainingEpisodes -
                                                           1):
                #uncomment to see how he is doing while testing
                # env.display_screen = True
                # env.force_fps = False
                test = testingEpisodes
                print('State space:', len(agent.q))
                testAcScore = 0

                #decrease learning rate over time
                agent.learning_rate /= 2
            elif test > 0:
                test -= 1
            else:
                env.display_screen = False
                env.force_fps = True

            #New highscore
            if score > maxScore:
                maxScore = score
                print("Highscore:", maxScore)
            if test > 0:
                avgScore = testAcScore / ((testingEpisodes + 1) - test)
                print("Highscore:", maxScore, "Average:",
                      format(avgScore, '.3f'), "Frame:", frames, "Episode:",
                      runs - nb_episodes + 1, " Score:", score)
            if frames == 1000000:
                print(
                    "*****************************************************************************\nFrame limit reached\n**********************************************************"
                )
            env.reset_game()
            nb_episodes -= 1
            score = 0
        if (i.fitness > maior.fitness): maior = i

config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                     neat.DefaultSpeciesSet, neat.DefaultStagnation,
                     config_path)
maior = neat.nn.FeedForwardNetwork.create(maior, config)

Score = 0

app = QApplication([])
window = QMainWindow()
lcd = Ui_MainWindow()
lcd.setupUi(window)

while (True):
    State1 = ENV.game.getGameState()

    INP1 = State1["player_y"]
    INP2 = State1["next_pipe_bottom_y"]
    INP3 = (-State1["next_pipe_top_y"] + State1["next_pipe_bottom_y"]) / 2
    OUTPUT = maior.activate((INP1, INP2, INP3))

    VAL = 119 if OUTPUT[0] >= 0.4 else None
    RESP = ENV.act(VAL)
    ENV.display_screen = True
    ENV.force_fps = False

    if (RESP > 0):
        Score += 1
        lcd.Adicionar_Score(Score)
Esempio n. 9
0
def agent_training(agent_file_path, agent_file_name, fig_path, num_steps_train_total = 5000):
    # training parameters
    num_epochs = 5
    num_steps_train_epoch = num_steps_train_total/num_epochs  # steps per epoch of training
    num_steps_test = 100
    update_frequency = 10  # step frequency of model training/updates

    epsilon = 0.15  # percentage of time we perform a random action, help exploration.
    epsilon_steps = 1000  # decay steps
    epsilon_min = 0.1
    epsilon_rate = (epsilon - epsilon_min) / epsilon_steps

    # memory settings
    max_memory_size = 10000
    min_memory_size = 60  # number needed before model training starts

    game = FlappyBird()
    env = PLE(game, fps=30, display_screen=True, force_fps=True, state_preprocessor=process_state)
    my_agent = init_agent(env)

    memory = utils.ReplayMemory(max_memory_size, min_memory_size)
    env.init()

    # Logging configuration and figure plotting
    logging.basicConfig(filename='../learning.log', filemode='w',
                        level=logging.DEBUG, format='%(levelname)s:%(message)s')
    logging.info('========================================================')
    logging.info('Training started for total training steps: '+str(num_steps_train_total)+'.\n')
    learning_rewards = [0]
    testing_rewards = [0]

    for epoch in range(1, num_epochs + 1):
        steps, num_episodes = 0, 0
        losses, rewards = [], []
        env.display_screen = False

        # training loop
        while steps < num_steps_train_epoch:
            episode_reward = 0.0
            my_agent.start_episode()

            while env.game_over() == False and steps < num_steps_train_epoch:
                state = env.getGameState()
                reward, action = my_agent.act(state, epsilon=epsilon)
                memory.add([state, action, reward, env.game_over()])

                if steps % update_frequency == 0:
                    loss = memory.train_agent_batch(my_agent)

                    if loss is not None:
                        losses.append(loss)
                        epsilon = np.max(epsilon_min, epsilon - epsilon_rate)

                episode_reward += reward
                steps += 1

            if steps < num_steps_train_epoch:
                learning_rewards.append(episode_reward)

            if num_episodes % 5 == 0:
                # print "Episode {:01d}: Reward {:0.1f}".format(num_episodes, episode_reward)
                logging.info("Episode {:01d}: Reward {:0.1f}".format(num_episodes, episode_reward))

            rewards.append(episode_reward)
            num_episodes += 1
            my_agent.end_episode()

        # print "Train Epoch {:02d}: Epsilon {:0.4f} | Avg. Loss {:0.3f} | Avg. Reward {:0.3f}\n"\
        #     .format(epoch, epsilon, np.mean(losses), np.sum(rewards) / num_episodes)
        logging.info("Train Epoch {:02d}: Epsilon {:0.4f} | Avg. Loss {:0.3f} | Avg. Reward {:0.3f}\n"
                     .format(epoch, epsilon, np.mean(losses), np.sum(rewards) / num_episodes))

        steps, num_episodes = 0, 0
        losses, rewards = [], []

        # display the screen
        # env.display_screen = True

        # slow it down so we can watch it fail!
        # env.force_fps = True

        # testing loop
        while steps < num_steps_test:
            episode_reward = 0.0
            my_agent.start_episode()

            while env.game_over() == False and steps < num_steps_test:
                state = env.getGameState()
                reward, action = my_agent.act(state, epsilon=0.05)

                episode_reward += reward
                testing_rewards.append(testing_rewards[-1]+reward)
                steps += 1

                # done watching after 500 steps.
                if steps > 500:
                    env.display_screen = False

            if num_episodes % 5 == 0:
                # print "Episode {:01d}: Reward {:0.1f}".format(num_episodes, episode_reward)
                logging.info("Episode {:01d}: Reward {:0.1f}".format(num_episodes, episode_reward))

            if steps < num_steps_test:
                testing_rewards.append(episode_reward)

            rewards.append(episode_reward)
            num_episodes += 1
            my_agent.end_episode()

        # print "Test Epoch {:02d}: Best Reward {:0.3f} | Avg. Reward {:0.3f}\n"\
        #     .format(epoch, np.max(rewards), np.sum(rewards) / num_episodes)
        logging.info("Test Epoch {:02d}: Best Reward {:0.3f} | Avg. Reward {:0.3f}\n"
                     .format(epoch, np.max(rewards), np.sum(rewards) / num_episodes))

    logging.info("Training complete.\n\n")
    plot_figure(fig_path, learning_rewards, 'reward', 'reward_in_training', num_steps_train_total)
    plot_figure(fig_path, testing_rewards, 'reward', 'reward_in_testing', num_steps_train_total)

    save_agent(my_agent, agent_file_path, agent_file_name)