Esempio n. 1
0
def test_smoke():
    # just runs the code - no assetions

    numberOfCells = 10 # in each axis
    startingPosition = (4, 5) # head
    foodPosition = (3, 6)

    env = Environment(numberOfCells)
    agent = DQNAgent(state_size=env.state_size, action_size=Actions.action_size, deterministic=True, batch_size=24, memory_limit=2000)
    state = env.reset(startingPosition, foodPosition)
    agent.reset_convolutional_layers()
    full_state = agent.get_convolutional_layers(state)

    maxsteps = 2

    for step in range(maxsteps):
        action = agent.get_exploration_action()
        next_state, reward, done = env.step(action, food_position=(1, 1))
        full_next_state = agent.get_convolutional_layers(next_state)
        assert(full_next_state.shape == (1, numberOfCells, numberOfCells, agent.numberOfLayers))
        agent.save_transition(full_state, action, reward, full_next_state, done)
        current_loss = agent.train()

        if (step == 0):
            action1 = action
            loss1 = current_loss

        full_state = full_next_state

    loss2 = current_loss
    action2 = action
Esempio n. 2
0
def test_single_training():

    numberOfCells = 10 # in each axis
    startingPosition = (4, 5) # head
    foodPosition = (3, 6)

    env = Environment(numberOfCells, deterministic=True)
    agent = DQNAgent(state_size=env.state_size, action_size=Actions.action_size, deterministic=True, batch_size=24, memory_limit=2000)
    state = env.reset(startingPosition, foodPosition)
    agent.reset_convolutional_layers()
    full_state = agent.get_convolutional_layers(state)
    loss10 = -1
    action10 = -1

    maxsteps = 10

    for step in range(maxsteps):
        action = agent.get_exploration_action()
        next_state, reward, done = env.step(action, food_position=(1, 1))
        assert(not done)
        full_next_state = agent.get_convolutional_layers(next_state)
        assert(full_next_state.shape == (1, numberOfCells, numberOfCells, agent.numberOfLayers))
        agent.save_transition(full_state, action, reward, full_next_state, done)
        current_loss = agent.train()
        full_state = full_next_state

    loss10 = current_loss
    action10 = action

    assert(loss10 == 0.006804642267525196)
    assert(action10 == 0)
Esempio n. 3
0
def test_multiepisode_training():

    numberOfCells = 10 # in each axis
    startingPosition = (4, 5) # head
    foodPosition = (3, 6)

    env = Environment(numberOfCells, deterministic=True)
    state_size = env.state_size
    action_size = Actions.action_size # 3
    agent = DQNAgent(state_size=state_size, action_size=action_size, deterministic=True, batch_size=24, memory_limit=2000)

    losses = [-1, -1, -1, -1]
    done = False

    episodes = 4
    maxsteps = 9

    for e in range(episodes):

        state = env.reset(startingPosition, foodPosition)
        agent.reset_convolutional_layers()
        full_state = agent.get_convolutional_layers(state)
        loss = 0

        for step in range(maxsteps):
            action = agent.get_exploration_action()
            next_state, reward, done = env.step(action, food_position=(1, 1)) # generation on (1, 1) happens once over the test
            full_next_state = agent.get_convolutional_layers(next_state)
            assert(full_next_state.shape == (1, numberOfCells, numberOfCells, agent.numberOfLayers))
            agent.save_transition(full_state, action, reward, full_next_state, done)

            current_loss = agent.train()
            loss += current_loss

            full_state = full_next_state

        losses[e] = loss

    assert(losses[0] == 3.9618697417899966)
    assert(losses[1] == 0.044194952584803104)
    assert(losses[2] == 0.1333141174982302)
    assert(losses[3] == 2.834151452407241)
Esempio n. 4
0
def train_snake():

    # todo: put all these parameters using a configuration file
    numberOfCells = 10  # in each axis
    startingPosition = (4, 5)  # head
    foodPosition = (3, 6)
    max_steps_allowed = 1000

    env = Environment(numberOfCells)
    state_size = env.state_size  #(numberOfCells x numberOfCells)
    action_size = Actions.action_size  # 3
    agent = DQNAgent(state_size=state_size,
                     action_size=action_size,
                     batch_size=32,
                     memory_limit=6000,
                     number_of_channels=5)

    episodes = 30000
    decay = 0.9 / episodes * 2  # changes epsilon : explore vs exploit

    epochs = []
    losses = []
    steps_list = []

    with open('training_data', 'w') as f:

        for e in range(episodes):

            state = env.reset(startingPosition)
            #print('state array reset: \n', state)

            agent.reset_convolutional_layers()
            full_state = agent.get_convolutional_layers(state)
            loss = 0.0
            steps = 0
            done = False
            episode_reward = 0

            while not done:

                # state at this point is just a 2D array
                action = agent.get_action(full_state)
                #action = agent.get_raction()
                #print('action chosen: ', action)

                # step onto the next state
                next_state, reward, done = env.step(action)

                #print('state array after step ', steps, ' : \n', next_state)
                #print('reward returned: ', reward)
                #print('next state: ', next_state)

                # we store the next_state in (1,H,W,C)
                full_next_state = agent.get_convolutional_layers(next_state)
                #print('full next state: \n:', full_next_state)
                #assert(full_next_state.shape == (1, numberOfCells, numberOfCells, agent.numberOfLayers))

                # save S,A,R,S' to experience
                # full states are a snapshot - copies of the state
                agent.save_transition(full_state, action, reward,
                                      full_next_state, done)
                episode_reward += reward

                # use alternative policy to train model - rely on experience only
                current_loss = agent.train()
                #print('current_loss: ', current_loss)
                loss += current_loss
                full_state = full_next_state

                # limit max steps - avoid something bad
                steps += 1
                if steps >= max_steps_allowed:
                    done = True

            # next episode
            if agent.epsilon > 0.1:
                agent.epsilon -= decay  # agent slowly reduces exploring

            print(
                'episode: {:5d} steps: {:3d} epsilon: {:.3f} loss: {:8.4f} reward: {:3d} fruits: {:2d}'
                .format(e, steps, agent.epsilon, loss, episode_reward,
                        env.fruits_eaten))
            f.write('{:5d} {:3d} {:8.4f} {:4d} {:2d}\n'.format(
                e, steps, loss, episode_reward, env.fruits_eaten))

        agent.model.save('trained_snake.model')