예제 #1
0
def prepare_input(parking_states):
    result = build_input(env, parking_states[0]).reshape(INPUT_SIZE, 1)
    for i in range(1, len(parking_states)):
        result = np.column_stack(
            (result, build_input(env,
                                 parking_states[i]).reshape(INPUT_SIZE, 1)))
    return result.T
예제 #2
0
파일: dqn.py 프로젝트: prodand/smart-taxi
def prepare_input(current_state):
    return build_input(env, current_state).reshape((1, INPUT_SIZE))
예제 #3
0
파일: dqn.py 프로젝트: prodand/smart-taxi
    return build_input(env, current_state).reshape((1, INPUT_SIZE))


if __name__ == '__main__':
    network = DqnNetwork(INPUT_SIZE)

    env.reset()
    new_state = env.s
    env.render()
    total_wins = 0
    steps_to_win = 0
    print_values(env.s)
    iteration = 0
    while True:
        k = 0
        frame = build_input(env, new_state)
        end = False
        print("----NEW ROUND----")
        my_reward = 0
        states = list()
        rewards = list()
        iteration += 1
        while not end:
            action = network.predict(frame)
            max_action = np.argmax(action)
            old_state = new_state
            new_state, reward, done, info = env.step(max_action)
            k += 1
            steps_to_win += 1
            my_reward, end = calculate_reward(old_state, new_state, k)
예제 #4
0
def prepare_input(current_state, next_state):
    return np.column_stack(
        (build_input(env, current_state), build_input(env, next_state))).T
예제 #5
0
if __name__ == '__main__':
    network = McNetwork(INPUT_SIZE)
    env.reset()
    env.render()
    total_wins = 0
    iteration = 0
    performance = list()
    averages = list()
    avg = 0
    last_plot_point = 0
    episodes = 0
    while True:
        new_state = env.s
        k = 0
        frame = build_input(env, new_state)
        end = False

        print("----NEW ROUND----")
        env.render()
        my_reward = 0
        iteration += 1
        states = list()
        rewards = list()
        while not end:
            action = network.predict(frame)
            max_action = np.argmax(action)
            old_state = new_state
            new_state, reward, done, info = env.step(max_action)
            k += 1
            my_reward, end, reset = calculate_reward(old_state, new_state, k,