예제 #1
0
def test_single_training():

    numberOfCells = 10 # in each axis
    startingPosition = (4, 5) # head
    foodPosition = (3, 6)

    env = Environment(numberOfCells, deterministic=True)
    agent = DQNAgent(state_size=env.state_size, action_size=Actions.action_size, deterministic=True, batch_size=24, memory_limit=2000)
    state = env.reset(startingPosition, foodPosition)
    agent.reset_convolutional_layers()
    full_state = agent.get_convolutional_layers(state)
    loss10 = -1
    action10 = -1

    maxsteps = 10

    for step in range(maxsteps):
        action = agent.get_exploration_action()
        next_state, reward, done = env.step(action, food_position=(1, 1))
        assert(not done)
        full_next_state = agent.get_convolutional_layers(next_state)
        assert(full_next_state.shape == (1, numberOfCells, numberOfCells, agent.numberOfLayers))
        agent.save_transition(full_state, action, reward, full_next_state, done)
        current_loss = agent.train()
        full_state = full_next_state

    loss10 = current_loss
    action10 = action

    assert(loss10 == 0.006804642267525196)
    assert(action10 == 0)
예제 #2
0
def test_smoke():
    # just runs the code - no assetions

    numberOfCells = 10 # in each axis
    startingPosition = (4, 5) # head
    foodPosition = (3, 6)

    env = Environment(numberOfCells)
    agent = DQNAgent(state_size=env.state_size, action_size=Actions.action_size, deterministic=True, batch_size=24, memory_limit=2000)
    state = env.reset(startingPosition, foodPosition)
    agent.reset_convolutional_layers()
    full_state = agent.get_convolutional_layers(state)

    maxsteps = 2

    for step in range(maxsteps):
        action = agent.get_exploration_action()
        next_state, reward, done = env.step(action, food_position=(1, 1))
        full_next_state = agent.get_convolutional_layers(next_state)
        assert(full_next_state.shape == (1, numberOfCells, numberOfCells, agent.numberOfLayers))
        agent.save_transition(full_state, action, reward, full_next_state, done)
        current_loss = agent.train()

        if (step == 0):
            action1 = action
            loss1 = current_loss

        full_state = full_next_state

    loss2 = current_loss
    action2 = action
예제 #3
0
def test_multiepisode_training():

    numberOfCells = 10 # in each axis
    startingPosition = (4, 5) # head
    foodPosition = (3, 6)

    env = Environment(numberOfCells, deterministic=True)
    state_size = env.state_size
    action_size = Actions.action_size # 3
    agent = DQNAgent(state_size=state_size, action_size=action_size, deterministic=True, batch_size=24, memory_limit=2000)

    losses = [-1, -1, -1, -1]
    done = False

    episodes = 4
    maxsteps = 9

    for e in range(episodes):

        state = env.reset(startingPosition, foodPosition)
        agent.reset_convolutional_layers()
        full_state = agent.get_convolutional_layers(state)
        loss = 0

        for step in range(maxsteps):
            action = agent.get_exploration_action()
            next_state, reward, done = env.step(action, food_position=(1, 1)) # generation on (1, 1) happens once over the test
            full_next_state = agent.get_convolutional_layers(next_state)
            assert(full_next_state.shape == (1, numberOfCells, numberOfCells, agent.numberOfLayers))
            agent.save_transition(full_state, action, reward, full_next_state, done)

            current_loss = agent.train()
            loss += current_loss

            full_state = full_next_state

        losses[e] = loss

    assert(losses[0] == 3.9618697417899966)
    assert(losses[1] == 0.044194952584803104)
    assert(losses[2] == 0.1333141174982302)
    assert(losses[3] == 2.834151452407241)
예제 #4
0
# We can use proportional or rank-based prioritized replay (proportional seems to be prefered by many papers)
# Simple, non-prioritized replay is also implemented

alpha_scheduler = dqn.annealing_schedules.Constant(0.7)
beta_scheduler = dqn.annealing_schedules.Constant(0.5)
memory = dqn.experience_replay.Proportional(capacity=50000,
                                            alpha_scheduler=alpha_scheduler,
                                            beta_scheduler=beta_scheduler)
##memory = dqn.experience_replay.RankBased(capacity=50000, alpha_scheduler=alpha_scheduler, beta_scheduler=beta_scheduler)
##memory = dqn.experience_replay.Simple(capacity=50000)

# Below we add n-step learning with the parameter n-step
# Not yet supported: Frame skipping will be added in the future
agent = DQNAgent(network=q_func,
                 observation_space=env.observation_space,
                 action_space=env.action_space,
                 action_selection=action_selection,
                 loss=loss,
                 update_target=update_target,
                 memory=memory,
                 n_step=3,
                 update_target_network_frequency=2000)

agent.train(env, num_timesteps=num_steps, render=False)

# We can save and load an agent
# Note: Currently this only saves the weights of the network -- the entire agent must be recreated (or reused, as would happen here) before calling load
##agent.save('/tmp/save_test/test')
##agent.load('/tmp/save_test/test')
예제 #5
0
def train_snake():

    # todo: put all these parameters using a configuration file
    numberOfCells = 10  # in each axis
    startingPosition = (4, 5)  # head
    foodPosition = (3, 6)
    max_steps_allowed = 1000

    env = Environment(numberOfCells)
    state_size = env.state_size  #(numberOfCells x numberOfCells)
    action_size = Actions.action_size  # 3
    agent = DQNAgent(state_size=state_size,
                     action_size=action_size,
                     batch_size=32,
                     memory_limit=6000,
                     number_of_channels=5)

    episodes = 30000
    decay = 0.9 / episodes * 2  # changes epsilon : explore vs exploit

    epochs = []
    losses = []
    steps_list = []

    with open('training_data', 'w') as f:

        for e in range(episodes):

            state = env.reset(startingPosition)
            #print('state array reset: \n', state)

            agent.reset_convolutional_layers()
            full_state = agent.get_convolutional_layers(state)
            loss = 0.0
            steps = 0
            done = False
            episode_reward = 0

            while not done:

                # state at this point is just a 2D array
                action = agent.get_action(full_state)
                #action = agent.get_raction()
                #print('action chosen: ', action)

                # step onto the next state
                next_state, reward, done = env.step(action)

                #print('state array after step ', steps, ' : \n', next_state)
                #print('reward returned: ', reward)
                #print('next state: ', next_state)

                # we store the next_state in (1,H,W,C)
                full_next_state = agent.get_convolutional_layers(next_state)
                #print('full next state: \n:', full_next_state)
                #assert(full_next_state.shape == (1, numberOfCells, numberOfCells, agent.numberOfLayers))

                # save S,A,R,S' to experience
                # full states are a snapshot - copies of the state
                agent.save_transition(full_state, action, reward,
                                      full_next_state, done)
                episode_reward += reward

                # use alternative policy to train model - rely on experience only
                current_loss = agent.train()
                #print('current_loss: ', current_loss)
                loss += current_loss
                full_state = full_next_state

                # limit max steps - avoid something bad
                steps += 1
                if steps >= max_steps_allowed:
                    done = True

            # next episode
            if agent.epsilon > 0.1:
                agent.epsilon -= decay  # agent slowly reduces exploring

            print(
                'episode: {:5d} steps: {:3d} epsilon: {:.3f} loss: {:8.4f} reward: {:3d} fruits: {:2d}'
                .format(e, steps, agent.epsilon, loss, episode_reward,
                        env.fruits_eaten))
            f.write('{:5d} {:3d} {:8.4f} {:4d} {:2d}\n'.format(
                e, steps, loss, episode_reward, env.fruits_eaten))

        agent.model.save('trained_snake.model')
예제 #6
0
    help="Please specify the agent you wish to use, either DQN or A3C",
    required=True)
parser.add_argument(
    "-n",
    "--mode",
    type=str,
    action='store',
    help="Please specify the mode you wish to run, either train or eval",
    required=True)

args = parser.parse_args()
print(args)

if args.model == 'DQN':
    agent = DQNAgent()

    if args.mode == 'train':
        agent.train()

    if args.mode == 'eval':
        agent.Evaluate()

if args.model == 'A3C':
    agent = A3CGlobalAgent()

    if args.mode == 'train':
        agent.train()

    if args.mode == 'eval':
        agent.Evaluate()