Esempio n. 1
0
# Create environment
game_wrapper = GameWrapper(MAX_NOOP_STEPS)
print("The environment has the following {} actions: {}".format(
    game_wrapper.env.action_space.n,
    game_wrapper.env.unwrapped.get_action_meanings()))

# Create agent
MAIN_DQN = build_q_network(LEARNING_RATE, input_shape=INPUT_SHAPE)
TARGET_DQN = build_q_network(input_shape=INPUT_SHAPE)

replay_buffer = ReplayBuffer(size=MEM_SIZE, input_shape=INPUT_SHAPE)
agent = Agent(MAIN_DQN, TARGET_DQN, replay_buffer, input_shape=INPUT_SHAPE)

print('Loading model...')
# We only want to load the replay buffer when resuming training
agent.load('./saved_models/save-02502048/', load_replay_buffer=False)
print('Loaded.')

terminal = True
eval_rewards = []
evaluate_frame_number = 0

for frame in range(EVAL_LENGTH):
    if terminal:
        game_wrapper.reset(evaluation=True)
        life_lost = True
        episode_reward_sum = 0
        terminal = False

    # Breakout require a "fire" action (action #1) to start the
    # game each time a life is lost.
Esempio n. 2
0
    # Build main and target networks
    MAIN_DQN = build_q_network(LEARNING_RATE, input_shape=INPUT_SHAPE)
    TARGET_DQN = build_q_network(input_shape=INPUT_SHAPE)

    replay_buffer = ReplayBuffer(size=MEM_SIZE, input_shape=INPUT_SHAPE)
    agent = Agent(MAIN_DQN,
                  TARGET_DQN,
                  replay_buffer,
                  input_shape=INPUT_SHAPE,
                  batch_size=BATCH_SIZE)
else:
    # TODO: LOADING IS A LITTLE BROKEN AT THE MOMENTS!
    # Load the agent instead
    print('Loading from', LOAD_FROM)
    meta = agent.load(LOAD_FROM, LOAD_REPLAY_BUFFER)

    # Apply information loaded from meta
    frame_number = meta['frame_number']
    rewards = meta['rewards']
    loss_list = meta['loss_list']

    print('Loaded')

# FULL TRAINING LOOP
try:
    # Allows us to write to Tensorboard
    with writer.as_default():
        while frame_number < TOTAL_FRAMES:
            epoch_frame = 0