def prepare_input(parking_states): result = build_input(env, parking_states[0]).reshape(INPUT_SIZE, 1) for i in range(1, len(parking_states)): result = np.column_stack( (result, build_input(env, parking_states[i]).reshape(INPUT_SIZE, 1))) return result.T
def prepare_input(current_state): return build_input(env, current_state).reshape((1, INPUT_SIZE))
return build_input(env, current_state).reshape((1, INPUT_SIZE)) if __name__ == '__main__': network = DqnNetwork(INPUT_SIZE) env.reset() new_state = env.s env.render() total_wins = 0 steps_to_win = 0 print_values(env.s) iteration = 0 while True: k = 0 frame = build_input(env, new_state) end = False print("----NEW ROUND----") my_reward = 0 states = list() rewards = list() iteration += 1 while not end: action = network.predict(frame) max_action = np.argmax(action) old_state = new_state new_state, reward, done, info = env.step(max_action) k += 1 steps_to_win += 1 my_reward, end = calculate_reward(old_state, new_state, k)
def prepare_input(current_state, next_state): return np.column_stack( (build_input(env, current_state), build_input(env, next_state))).T
if __name__ == '__main__': network = McNetwork(INPUT_SIZE) env.reset() env.render() total_wins = 0 iteration = 0 performance = list() averages = list() avg = 0 last_plot_point = 0 episodes = 0 while True: new_state = env.s k = 0 frame = build_input(env, new_state) end = False print("----NEW ROUND----") env.render() my_reward = 0 iteration += 1 states = list() rewards = list() while not end: action = network.predict(frame) max_action = np.argmax(action) old_state = new_state new_state, reward, done, info = env.step(max_action) k += 1 my_reward, end, reset = calculate_reward(old_state, new_state, k,