def main(): env = gym.make('BreakoutDeterministic-v4') frame = env.reset() env.render() frames_per_action = 4 num_actions = 4 ATARI_SHAPE_PLUSONE = (105, 80, 5) num_games = 10 this_states = RingBuffer(5) this_rewards = RingBuffer(4) all_prev_states = [] all_next_states = [] all_actions = [] all_rewards = [] all_isterminal = [] # print('a') prev_frame = preprocess(frame) for this_game in range(0, num_games): iter_count = 0 is_done = False while not is_done: this_action = env.action_space.sample() # print('b') this_action_onehot = action_to_onehot(this_action) this_states.append(prev_frame) for action_count in range(0, frames_per_action): # print('c') frame, reward, is_done, _ = env.step(this_action) this_states.append(preprocess(frame)) this_rewards.append(transform_reward(reward)) if not is_done: env.render() else: frame = env.reset() env.render() break prev_frame = frame if (iter_count > 0): all_prev_states.append(this_states.clip_from_end(1)) all_next_states.append(this_states.clip_from_start(1)) all_rewards.append(this_rewards) all_actions.append(this_action) all_isterminal.append(int(is_done)) # is_done = False iter_count += 1 # input() np_prev_states = np.asarray(all_prev_states) # print('prev states: ',np.shape(np_prev_states)) np_next_states = np.asarray(all_next_states) # print('next states: ',np.shape(np_next_states)) np_rewards = np.asarray(all_rewards) # np_rewards = np_rewards[:-1,:] # print('rewards: ',np.shape(np_rewards)) np_actions = np.asarray(all_actions) # np_actions = np_actions[:-1] # print('actions: ',np.shape(np_actions)) np_isterminal = np.asarray(all_isterminal) # np_isterminal = np_isterminal[:-1] # print('isterminal: ',np.shape(np_isterminal)) np_num_objects = np.size(np_isterminal) # print('num_objects:',np_num_objects) t_model = atari_model(num_actions)