Exemple #1
0
def step(snakes: list, food: Food, action):
        # First snake must be the player7
        player = snakes[0]
        player.action([action], "AC")
        done = False
        obs = "Wrong Input"
        # currently overall reward increases every time food is eaten
        handleFoodEating(snakes, food)
        # the current state after move has been done
        # could be the whole f*****g grid or maybe just part of it around snakes head
        # for now let me try making it a 5x5 grid around its head
        if obs_type == "Grid":
            obs = getObsGrid(snakes, food, OBS_GRID_SIZE, fullGrid=False)
        if obs_type == "Small":
            obs = getObsSmall(snakes, food)
        info = ""

        return obs, player.reward, done, info
Exemple #2
0
        return obs, player.reward, done, info


    score_history = []
    score = 0
    n_steps_history = []
    for i in tqdm(range(num_episodes)):
        player = Snake(0)
        enemy = Snake(1)
        enemy.positions = [(random.randint(0, SIZE - 1), random.randint(0, SIZE - 1))]
        food = Food([player])
        done = False
        score = 0
        # returns a numpy array of the state we care about
        observation = getObsGrid(snakes=[player], food=food, size=OBS_GRID_SIZE, fullGrid=False)
        #observation = getObsSmall([player, enemy], food)
        n_steps = 0
        while not done and n_steps < 100:
            n_steps += 1
            # action needs to be either 0,1,2 or 3
            action = agentAC.choose_action(observation)
            observation_, reward, done, info = step(snakes=[player, enemy], food=food, action=action, obs_type=obs_type)
            agentAC.learn(observation, reward, observation_, done) # For Actor-Critic
            #agent.store_rewards(reward) # For REINFORCE
            observation = observation_
            score += reward
        score_history.append(score)
        n_steps_history.append(n_steps)
        if i % SHOW_EVERY == 0:
            #print(f"on #{i}, epsilon is {lr}")