Python create_display_environment 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: helper_functions

메소드/함수: create_display_environment

hotexamples.com에서의 예제들: 2

Python create_display_environment - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 helper_functions.create_display_environment에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

        if step < 500:
            action = pRandom()
        else:
            action = pGreedy(current_state, q_table)

        step += 1
        reward, new_state = perform_action(agent, action, drop_cells,
                                           pick_cells)
        q_table[current_state][action] = calculate_new_q(
            LEARNING_RATE, DISCOUNT, reward, action, current_state, new_state,
            q_table)
        session_reward += reward

        # visualization
        env = create_display_environment(drop_cells, pick_cells, agent)
        img = Image.fromarray(env, "RGB")
        cv2.imshow("", np.array(img))
        if (reward == PICK_UP_REWARD or reward == DROP_OFF_REWARD):
            if cv2.waitKey(WAIT_TIME) & 0xFF == ord("q"):
                break
        else:
            if cv2.waitKey(WAIT_TIME) & 0xFF == ord("q"):
                break

        # all drop off locations are filled
        if len(list(
                filter(lambda cell: cell.has_space() == False,
                       drop_cells))) == len(drop_cells) or step == HM_STEPS:
            break
    all_rewards.append(session_reward)

예제 #2

파일 보기

def experiment_2(LEARNING_RATE, DISCOUNT, HM_STEPS):
    q_table = initialize_q_table()
    step = 0
    all_rewards = []
    while step < HM_STEPS:
        # initial environment state
        drop_cells = [
            DropOffCell(0, 0),
            DropOffCell(0, 4),
            DropOffCell(2, 2),
            DropOffCell(4, 4)
        ]
        pick_cells = [PickUpCell(2, 4), PickUpCell(3, 1)]
        agent = Agent(4, 0)

        # while ai has not reached a terminal state
        session_reward = 0
        while True:
            current_state = get_state(agent, drop_cells, pick_cells)

            if step < 500:
                action = pRandom()
            else:
                action = pExploit(current_state, q_table)

            step += 1
            reward, new_state = perform_action(agent, action, drop_cells,
                                               pick_cells)
            next_action = pExploit(new_state, q_table)
            q_table[current_state][action] = sarsa_calculate_new_q(
                LEARNING_RATE, DISCOUNT, reward, action, next_action,
                current_state, new_state, q_table)
            session_reward += reward

            # visualization
            env = create_display_environment(drop_cells, pick_cells, agent)
            img = Image.fromarray(env, "RGB")
            cv2.imshow("", np.array(img))
            if (reward == PICK_UP_REWARD or reward == DROP_OFF_REWARD):
                if cv2.waitKey(WAIT_TIME) & 0xFF == ord("q"):
                    break
            else:
                if cv2.waitKey(WAIT_TIME) & 0xFF == ord("q"):
                    break

            # all drop off locations are filled
            if len(
                    list(
                        filter(lambda cell: cell.has_space() == False,
                               drop_cells))) == len(
                                   drop_cells) or step == HM_STEPS:
                break
        all_rewards.append(session_reward)

    plt.plot([i for i in range(len(all_rewards))], all_rewards)
    plt.ylabel("Reward Collected")
    plt.xlabel("Session")
    plt.show()

    print(f"max reward reached: {max(all_rewards)}")

    with open(f"qtable-experiment-2.pickle", "wb") as f:
        pickle.dump(q_table, f)