예제 #1
0
def main():

    env = gym.make('BreakoutDeterministic-v4')
    frame = env.reset()
    env.render()

    frames_per_action = 4
    num_actions = 4
    ATARI_SHAPE_PLUSONE = (105, 80, 5)
    num_games = 10

    this_states = RingBuffer(5)
    this_rewards = RingBuffer(4)

    all_prev_states = []
    all_next_states = []
    all_actions = []
    all_rewards = []
    all_isterminal = []

    # print('a')
    prev_frame = preprocess(frame)
    for this_game in range(0, num_games):
        iter_count = 0
        is_done = False
        while not is_done:
            this_action = env.action_space.sample()
            # print('b')
            this_action_onehot = action_to_onehot(this_action)
            this_states.append(prev_frame)
            for action_count in range(0, frames_per_action):
                # print('c')
                frame, reward, is_done, _ = env.step(this_action)
                this_states.append(preprocess(frame))
                this_rewards.append(transform_reward(reward))
                if not is_done:
                    env.render()
                else:
                    frame = env.reset()
                    env.render()
                    break
            prev_frame = frame
            if (iter_count > 0):
                all_prev_states.append(this_states.clip_from_end(1))
                all_next_states.append(this_states.clip_from_start(1))
                all_rewards.append(this_rewards)
                all_actions.append(this_action)
                all_isterminal.append(int(is_done))
                # is_done = False
            iter_count += 1
            # input()
    np_prev_states = np.asarray(all_prev_states)
    # print('prev states: ',np.shape(np_prev_states))
    np_next_states = np.asarray(all_next_states)
    # print('next states: ',np.shape(np_next_states))
    np_rewards = np.asarray(all_rewards)
    # np_rewards = np_rewards[:-1,:]
    # print('rewards: ',np.shape(np_rewards))
    np_actions = np.asarray(all_actions)
    # np_actions = np_actions[:-1]
    # print('actions: ',np.shape(np_actions))
    np_isterminal = np.asarray(all_isterminal)
    # np_isterminal = np_isterminal[:-1]
    # print('isterminal: ',np.shape(np_isterminal))

    np_num_objects = np.size(np_isterminal)
    # print('num_objects:',np_num_objects)

    t_model = atari_model(num_actions)