Python QLearningAgent 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: agents.q_agent_1teammate_v1.qagent

클래스/타입: QLearningAgent

hotexamples.com에서의 예제들: 5

Python QLearningAgent - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 agents.q_agent_1teammate_v1.qagent.QLearningAgent에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

QLearningAgent(2)

act(2)

load_q_table(2)

counter_exploitations(1)

counter_explorations(1)

learn_buffer(1)

store_ep(1)

update_hyper_parameters(1)

예제 #1

파일 보기

파일: train_player_w_static.py 프로젝트: pedMatias/matias_hfo

def test(num_episodes: int, game_interface: HFOAttackingPlayer,
         features: DiscreteFeatures1TeammateV1, agent: QLearningAgent,
         actions: DiscreteActions1TeammateV1, reward_funct) -> float:
    """
    @param num_episodes: number of episodes to run
    @param game_interface: game interface, that manages interactions
    between both;
    @param features: features interface, from the observation array, gets the
    main features for the agent;
    @param agent: learning agent;
    @param actions: actions interface;
    @param reward_funct: reward function used
    @return: (float) the win rate
    """
    # Run training using Q-Learning
    num_goals = 0
    for ep in range(num_episodes):
        # Check if server still up:
        if game_interface.hfo.step() == SERVER_DOWN:
            print("Server is down while testing; episode={}".format(ep))
            break
        # Go to origin position:
        features.update_features(game_interface.get_state())
        go_to_origin_position(game_interface=game_interface,
                              features=features,
                              actions=actions)
        # Test loop:
        debug_counter = 0  # TODO remove
        while game_interface.in_game():
            # Update environment features:
            curr_state_id = features.get_state_index()
            has_ball = features.has_ball()

            # Act:
            debug_counter += 1
            action_idx = agent.act(curr_state_id)
            action_name = actions.map_action_to_str(action_idx, has_ball)
            print("Agent playing {}".format(action_name))

            # Step:
            status = execute_action(action_name=action_name,
                                    features=features,
                                    game_interface=game_interface)

            # update features:
            reward = reward_funct(status)
        num_goals += 1 if reward == 1 else 0

        if status == OUT_OF_TIME:
            if debug_counter < 5:
                raise NoActionPlayedError(
                    "agent was only able to choose {}".format(debug_counter))
        # Game Reset
        game_interface.reset()
    print("<<TEST>> NUM Goals = ", num_goals)
    print("<<TEST>> NUM episodes = ", (ep + 1))
    print("<<TEST>> AVR win rate = ", num_goals / (ep + 1))
    return num_goals / num_episodes

예제 #2

파일 보기

def train(num_train_episodes: int, num_total_train_ep: int,
          game_interface: HFOAttackingPlayer,
          features: DiscreteFeatures1TeammateV1, agent: QLearningAgent,
          actions: DiscreteActions1TeammateV1, reward_funct):
    """
    @param num_train_episodes: number of episodes to train in this iteration
    @param num_total_train_ep: number total of episodes to train
    @param game_interface: game interface, that manages interactions
    between both;
    @param features: features interface, from the observation array, gets
    the main features for the agent;
    @param agent: learning agent;
    @param actions: actions interface;
    @param reward_funct: reward function used
    @return: (QLearningAgentV5) the agent
    """
    sum_score = 0
    sum_epsilons = 0
    agent.counter_explorations = 0
    agent.counter_exploitations = 0
    for ep in range(num_train_episodes):
        # Check if server still up:
        if game_interface.hfo.step() == SERVER_DOWN:
            raise ServerDownError("training; episode={}".format(ep))
        # Go to origin position:
        features.update_features(game_interface.get_state())
        go_to_origin_position(game_interface=game_interface,
                              features=features,
                              actions=actions)
        # Start learning loop
        debug_counter = 0  # TODO remove
        while game_interface.in_game():
            # Update environment features:
            curr_state_id = features.get_state_index()
            has_ball = features.has_ball()

            # Act:
            debug_counter += 1
            action_idx = agent.act(curr_state_id)
            action_name = actions.map_action_to_str(action_idx, has_ball)
            # print("Agent playing {} for {}".format(action_name, num_rep))

            # Step:
            status = execute_action(action_name=action_name,
                                    features=features,
                                    game_interface=game_interface)

            # Update environment features:
            reward = reward_funct(status)
            sum_score += reward
            new_state_id = features.get_state_index()
            agent.store_ep(state_idx=curr_state_id,
                           action_idx=action_idx,
                           reward=reward,
                           next_state_idx=new_state_id,
                           has_ball=has_ball,
                           done=not game_interface.in_game())
        if game_interface.get_game_status() == OUT_OF_TIME:
            if debug_counter < 5:
                raise NoActionPlayedError(
                    "agent was only able to choose {}".format(debug_counter))
        agent.learn_buffer()
        agent.update_hyper_parameters(num_total_episodes=num_total_train_ep)
        sum_epsilons += agent.epsilon
        # Game Reset
        game_interface.reset()
    print("<<TRAIN>> AVR reward = ", sum_score / num_train_episodes)
    print("<<TRAIN>> %Explorations={}% ".format(
        round(
            (agent.counter_explorations /
             (agent.counter_exploitations + agent.counter_explorations)), 4) *
        100))

예제 #3

파일 보기

    hfo_interface = HFOAttackingPlayer(num_opponents=num_op,
                                       num_teammates=num_team)
    hfo_interface.connect_to_server()
    print("Starting Training - id={}; num_opponents={}; num_teammates={}; "
          "num_episodes={};".format(hfo_interface.hfo.getUnum(), num_op,
                                    num_team, num_episodes))

    # Agent set-up
    reward_function = basic_reward
    features_manager = DiscreteFeatures1TeammateV1(num_team, num_op)
    actions_manager = DiscreteActions1TeammateV1()

    # Q Agent:
    agent = QLearningAgent(num_states=features_manager.get_num_states(),
                           num_actions=actions_manager.get_num_actions(),
                           learning_rate=0.1,
                           discount_factor=0.9,
                           epsilon=0.6,
                           final_epsilon=0.1)
    agent.load_q_table(model_file)
    # save original q_table
    save_model(q_table=agent.q_table,
               file_name="original_model",
               directory=save_dir)

    # Test one first time without previous train:
    av_reward = test(num_episodes=num_test_ep,
                     agent=agent,
                     game_interface=hfo_interface,
                     features=features_manager,
                     actions=actions_manager,
                     reward_funct=reward_function)

예제 #4

파일 보기

파일: train_player_w_static.py 프로젝트: pedMatias/matias_hfo

    # Initialize connection with the HFO server
    hfo_interface = HFOAttackingPlayer(num_opponents=num_op,
                                       num_teammates=num_team)
    hfo_interface.connect_to_server()
    print("Starting Training - id={}; num_opponents={}; num_teammates={}; "
          "num_episodes={};".format(hfo_interface.hfo.getUnum(), num_op,
                                    num_team, num_episodes))

    # Agent set-up
    reward_function = basic_reward
    features_manager = DiscreteFeatures1TeammateV1(num_team, num_op)
    actions_manager = DiscreteActions1TeammateV1()
    agent = QLearningAgent(num_states=features_manager.get_num_states(),
                           num_actions=actions_manager.get_num_actions(),
                           learning_rate=0.1,
                           discount_factor=0.9,
                           epsilon=0.8)

    # Test one first time without previous train:
    av_reward = test(num_episodes=num_test_ep,
                     agent=agent,
                     game_interface=hfo_interface,
                     features=features_manager,
                     actions=actions_manager,
                     reward_funct=reward_function)
    # Save metrics structures
    trained_eps_list = [0]
    avr_epsilons_list = [agent.epsilon]
    avr_rewards_list = [av_reward]
    qlearning_variation_list = [0]

예제 #5

파일 보기

    args = parser.parse_args()
    num_team = args.num_teammates
    num_op = args.num_opponents
    num_episodes = args.num_ep
    load_file = args.load_file

    print("Q Table file: ", load_file)
    print("Starting Test - num_opponents={}; num_teammates={}; "
          "num_episodes={};".format(num_op, num_team, num_episodes))
    # Initialize connection with the HFO server
    hfo_interface = HFOAttackingPlayer(num_opponents=num_op,
                                       num_teammates=num_team)
    hfo_interface.connect_to_server()

    # Agent set-up
    reward_function = basic_reward
    features_manager = DiscreteFeatures1TeammateV1(num_team, num_op)
    actions_manager = DiscreteActions1TeammateV1()
    agent = QLearningAgent(num_states=features_manager.get_num_states(),
                           num_actions=actions_manager.get_num_actions())
    agent.load_q_table(load_file)

    # Run training using Q-Learning
    av_reward = test(num_episodes=num_episodes,
                     agent=agent,
                     game_interface=hfo_interface,
                     features=features_manager,
                     actions=actions_manager,
                     reward_funct=reward_function)
    print("Av reward = {}".format(av_reward))