Beispiel #1
0
import numpy as np
from ple import PLE
from ple.games.snake import Snake
import random


def getReward(env, state, action):
    return(env.act(action))

agent = Snake(width=360, height=360)
Q = {}
gama = 0.9
alpha = 0.1
explore = 0.75

env = PLE(agent, fps=15, force_fps=False, display_screen=True)

env.init()

for i in range(100000):

    if explore != 0.25:
        if i % 30000 == 0:
            explore -= 0.25

    if env.game_over():
        env.reset_game()

    state = env.getGameState()
    
    del(state["snake_body_pos"])
def get_grille(game, snake_location, food_location):
    s_x, s_y = snake_location
    f_x, f_x = food_location
    grille = [[0] * int(game.width / 10) for i in range(int(game.height / 10))]
    grille[s_x][s_y] += 1
    grille[f_x][f_x] += 1
    return grille


# NE PAS CHANGER CETTE VARIABLE
case_size = 20
size = 10

# Initialisation du jeu
game = Snake(height=case_size * size, width=case_size * size)
p = PLE(game, fps=30, display_screen=True)

agent = Trainer(allowed_actions=p.getActionSet(),
                height=game.height,
                width=game.width)

p.init()
reward = 0.0
nb_frames = 10000000000000000
bestScore = 0

for i in range(nb_frames):

    if (p.score() > bestScore):
        bestScore = int(p.score())
Beispiel #3
0
import numpy as np
from ple import PLE
from ple.games.snake import Snake


def round_state(state):
    newState = {}
    for i in state:
        if i != 'snake_body' and i != 'snake_body_pos':
            newState[i] = (int(state[i]) / 60)
    return newState


agent = Snake(width=256, height=256)

env = PLE(agent, fps=15, force_fps=False, display_screen=True)

env.init()

actions = env.getActionSet()

q_table = {}
alpha = 0.1
gamma = 0.9

while True:

    print(q_table)

    old_game_state = round_state(agent.getGameState())
Beispiel #4
0
 def test_snake(self):
     from ple.games.snake import Snake
     game = Snake()
     self.run_a_game(game)
Beispiel #5
0
        fy = state['food_y']
        turns = individual.activate([sx,sy,fx,fy])
        best_turn = max(turns)
        if turns[0] == best_turn:
            fitness += snake_game.act(UP)
        elif turns[1] == best_turn:
            fitness += snake_game.act(LEFT)
        elif turns[2] == best_turn:
            fitness += snake_game.act(RIGHT)
        else:
            fitness += snake_game.act(DOWN)
    return fitness

if __name__ == '__main__':
    # set up flappybird game
    game = Snake(width=400, height=400)
    # NOTE- if training: set force_fps = true, if testing: set force_fps to false
    snake_game = PLE(game, fps=30, display_screen=True, force_fps=False)
    snake_game.init()

    # uncomment this block to train a solution
    #model = neat.NEAT(config_file="snake.config")
    #best_genome = model.run(fitness_function=test_snake)
    #pickle.dump( best_genome, open( "snek", "wb" ) )

    # uncomment this block to test solution
    LEFT = 119
    DOWN = 97
    UP = 100
    RIGHT = 115
    individual = pickle.load(open("snek", "rb"))
Beispiel #6
0

def discounted_rewards(rewards, gamma=0.99):
    res = []
    for r in reversed(rewards):
        cum_reward = res[0] if res else 0
        res.insert(0, gamma * cum_reward + r)

    return res


def train(env, agent):
    optimizer = torch.optim.Adam(agent.parameters())

    while True:
        agent.zero_grad()
        p, r = play_episode(env, agent)
        r = torch.tensor(discounted_rewards(r), device=agent.device)
        loss = -r * p
        loss = loss.mean()
        loss.backward()
        optimizer.step()


if __name__ == '__main__':
    env = PLE(Snake(), fps=30, display_screen=True)
    env.init()
    agent = Agent(env.getScreenDims(), 16, env.getActionSet())

    train(env, agent)