Esempio n. 1
0
# Start the game
state1, reward, done, info = env.step(1)

# Keep base to the left and move only the ball
for i in range(10):
    state1, reward, done, info = env.step(3)
state2, reward, done, info = env.step(3)  # Ball moves one step wrt state 1

# Keep base to the left and move only the ball
for i in range(19):  #
    state3, reward, done, info = env.step(3)
state4, reward, done, info = env.step(3)  # Ball moves one step wrt state 3 and breaks wall

preprocessed_state0 = np.expand_dims(np.asarray(crop_state(state1)), axis=0)
encoded_state0 = AE.encode(preprocessed_state0)
flat_encoded_state0 = AE.flat_encode(preprocessed_state0)
predicted_state0 = AE.predict(preprocessed_state0)
predicted_state0 = predicted_state0.reshape(4,84, 84)
conv_map0 = flat_encoded_state0.reshape(7,7)

preprocessed_state00 = np.expand_dims(np.asarray(crop_state(state00)), axis=0)
encoded_state00 = AE.encode(preprocessed_state00)
flat_encoded_state00 = AE.flat_encode(preprocessed_state00)
predicted_state00 = AE.predict(preprocessed_state00)
predicted_state00 = predicted_state00.reshape(4,84, 84)
conv_map00 = flat_encoded_state00.reshape(7,7)

preprocessed_state1 = np.expand_dims(np.asarray(crop_state(state1)), axis=0)
encoded_state1 = AE.encode(preprocessed_state1)
flat_encoded_state1 = AE.flat_encode(preprocessed_state1)
predicted_state1 = AE.predict(preprocessed_state1)
Esempio n. 2
0
state1, reward, done, info = env.step(1)

# Keep base to the left and move only the ball
for i in range(10):
    state1, reward, done, info = env.step(3)
state2, reward, done, info = env.step(3)  # Ball moves one step wrt state 1

# Keep base to the left and move only the ball
for i in range(19):  #
    state3, reward, done, info = env.step(3)
state4, reward, done, info = env.step(
    3)  # Ball moves one step wrt state 3 and breaks wall

preprocessed_state0 = np.expand_dims(np.asarray(crop_state(state1)), axis=0)
encoded_state0 = AE.encode(preprocessed_state0)
flat_encoded_state0 = AE.flat_encode(preprocessed_state0)
predicted_state0 = AE.predict(preprocessed_state0)
predicted_state0 = predicted_state0.reshape(4, 84, 84)
conv_map0 = flat_encoded_state0.reshape(7, 7)

preprocessed_state00 = np.expand_dims(np.asarray(crop_state(state00)), axis=0)
encoded_state00 = AE.encode(preprocessed_state00)
flat_encoded_state00 = AE.flat_encode(preprocessed_state00)
predicted_state00 = AE.predict(preprocessed_state00)
predicted_state00 = predicted_state00.reshape(4, 84, 84)
conv_map00 = flat_encoded_state00.reshape(7, 7)

preprocessed_state1 = np.expand_dims(np.asarray(crop_state(state1)), axis=0)
encoded_state1 = AE.encode(preprocessed_state1)
flat_encoded_state1 = AE.flat_encode(preprocessed_state1)
predicted_state1 = AE.predict(preprocessed_state1)
Esempio n. 3
0
    parser.add_argument('--env', type=str, default='BreakoutDeterministic-v3', help='Atari environment to run')
    parser.add_argument('--episodes', type=int, default=1000, help='number of episodes to run')
    parser.add_argument('--path', type=str, default='data/model.h5', help='path to the hdf5 weights file for the AE')
    parser.add_argument('-e', '--encode', action='store_true', help='save a SARS dataset with the encoded features')
    parser.add_argument('-i', '--images', action='store_true', help='save images of states and a SARS csv with ids')
    parser.add_argument('--min-score', type=int, default=0, help='keep episode only if it got more than this score')
    parser.add_argument('--onehot', action='store_true', help='save actions in the dataset with onehot encoding')
    args = parser.parse_args()

    logger = Logger(debug=args.debug)

    if args.encode:
        AE = Autoencoder((4 * 84 * 84,), load_path=args.path)

        # Automatically generate headers from the output length of AE.flat_encode
        nb_states = AE.flat_encode(np.expand_dims(np.ones(AE.input_shape), axis=0)).shape[0]
        nb_actions = envs.Atari(args.env).action_space.n
        actions_header = ['A%s' % i for i in xrange(nb_actions)] if args.onehot else ['A0']
        header = ['S%s' % i for i in xrange(nb_states)] + actions_header + ['R'] + \
                        ['SS%s' % i for i in xrange(nb_states)] + ['Absorbing', 'Finished']

        # Collect episodes
        dataset = collect_encoded_dataset(AE,
                                episodes=args.episodes,
                                env_name=args.env,
                                header=header,
                                onehot=args.onehot,
                                minimum_score=args.min_score,
                                video=args.video,
                                n_jobs=args.njobs)
        output_file = 'encoded_dataset.csv'