Python Environment.step Examples

Programming Language: Python

Namespace/Package Name: interface

Class/Type: Environment

Method/Function: step

Examples at hotexamples.com: 3

Python Environment.step - 3 examples found. These are the top rated real world Python examples of interface.Environment.step extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Environment(7)

get_obs(6)

get_done(3)

get_map_size(3)

get_reward(3)

get_unit_num(3)

step(3)

reset(2)

set_surrender(2)

get_obs_raw(1)

Example #1

Show file

                    agent2_crash_list.append(round_cnt)
                    print('Side 2 crashed!')
                    side1_obs_raw, side2_obs_raw = env.get_obs_raw()
                    side1_detector_obs_raw_list = side1_obs_raw[
                        'detector_obs_list']
                    side1_fighter_obs_raw_list = side1_obs_raw[
                        'fighter_obs_list']
                    side1_joint_obs_raw_dict = side1_obs_raw['joint_obs_dict']
                    side2_detector_obs_raw_list = side2_obs_raw[
                        'detector_obs_list']
                    side2_fighter_obs_raw_list = side2_obs_raw[
                        'fighter_obs_list']
                    side2_joint_obs_raw_dict = side2_obs_raw['joint_obs_dict']
                else:
                    # execution
                    env.step(side1_detector_action, side1_fighter_action,
                             side2_detector_action, side2_fighter_action)
                    # obs
                    side1_obs_raw, side2_obs_raw = env.get_obs_raw()
                    side1_detector_obs_raw_list = side1_obs_raw[
                        'detector_obs_list']
                    side1_fighter_obs_raw_list = side1_obs_raw[
                        'fighter_obs_list']
                    side1_joint_obs_raw_dict = side1_obs_raw['joint_obs_dict']
                    side2_detector_obs_raw_list = side2_obs_raw[
                        'detector_obs_list']
                    side2_fighter_obs_raw_list = side2_obs_raw[
                        'fighter_obs_list']
                    side2_joint_obs_raw_dict = side2_obs_raw['joint_obs_dict']

                    # 观测列表测试
                    # wait = 0

Example #2

Show file

File: tournament_mp.py Project: gingkg/pymarl_MaCA

def run(agent1_name,
        agent2_name,
        map_name,
        round_num,
        max_step,
        random_pos=False):
    """

    :param agent1_name: 红方名称
    :param agent2_name: 蓝方名称
    :param map_name: 地图名称
    :param round_num: 对战局数
    :param max_step: 单局最大step
    :param random_pos: 随机起始位置
    :return: agent1_win_times, agent2_win_times, draw_times, agent1_crash_times, agent2_crash_times, agent1_timeout_times, agent2_timeout_times, agent1_launch_failure_times, agent2_launch_failure_times
    """
    side1_win_times = 0
    side2_win_times = 0
    draw_times = 0
    log_flag = agent1_name + '_vs_' + agent2_name
    agent1_launch_failed = False
    agent2_launch_failed = False
    round_cnt = 0
    agent1_crash_list = []
    agent2_crash_list = []
    agent1_timeout_list = []
    agent2_timeout_list = []

    # file path constructing
    map_path = 'maps/' + map_name + '.map'
    agent1_path = 'agent/' + agent1_name + '/agent.py'
    agent2_path = 'agent/' + agent2_name + '/agent.py'

    if not os.path.exists(map_path):
        print('Error: map file not exist!')
        exit(-1)
    if not os.path.exists(agent1_path):
        print('Error: agent1 file not exist!')
        exit(-1)
    if not os.path.exists(agent2_path):
        print('Error: agent2 file not exist!')
        exit(-1)
    # make env
    env = Environment(map_path,
                      'raw',
                      'raw',
                      max_step=max_step,
                      render=True,
                      random_pos=random_pos,
                      log=log_flag)
    # get map info
    size_x, size_y = env.get_map_size()
    side1_detector_num, side1_fighter_num, side2_detector_num, side2_fighter_num = env.get_unit_num(
    )
    # create agent
    agent1 = AgentCtrl(agent1_name, size_x, size_y, side1_detector_num,
                       side1_fighter_num)
    agent2 = AgentCtrl(agent2_name, size_x, size_y, side2_detector_num,
                       side2_fighter_num)
    if not agent1.agent_init():
        print('ERROR: Agent1 ' + agent1_name + ' init failed!')
        agent1.terminate()
        agent2.terminate()
        agent1_launch_failed = True
    if not agent2.agent_init():
        print('ERROR: Agent2 ' + agent2_name + ' init failed!')
        agent1.terminate()
        agent2.terminate()
        agent2_launch_failed = True
    # 若此处一方启动失败，则认为该方全败，启动失败计round_num次，若双方启动失败，则认为双方平局round_num次，其他与前述相同。
    if agent1_launch_failed and agent2_launch_failed:
        return 0, 0, round_num, 0, 0, 0, 0, round_num, round_num
    elif agent1_launch_failed:
        return 0, round_num, 0, 0, 0, 0, 0, round_num, 0
    elif agent2_launch_failed:
        return round_num, 0, 0, 0, 0, 0, 0, 0, round_num
    # execution
    # input("Press the <ENTER> key to continue...")
    for x in range(round_num):
        if x != 0:
            env.reset()
        step_cnt = 0
        round_cnt += 1
        while True:
            step_cnt += 1
            # get obs
            side1_obs_dict, side2_obs_dict = env.get_obs()
            # get action
            agent1_action, agent1_result = agent1.get_action(
                side1_obs_dict, step_cnt)
            if agent1_result == 0:
                side1_detector_action = agent1_action['detector_action']
                side1_fighter_action = agent1_action['fighter_action']
            elif agent1_result == 1:
                agent1_crash_list.append(round_cnt)
            elif agent1_result == 2:
                agent1_timeout_list.append(round_cnt)
            agent2_action, agent2_result = agent2.get_action(
                side2_obs_dict, step_cnt)
            if agent2_result == 0:
                side2_detector_action = agent2_action['detector_action']
                side2_fighter_action = agent2_action['fighter_action']
            elif agent2_result == 1:
                agent2_crash_list.append(round_cnt)
            elif agent2_result == 2:
                agent2_timeout_list.append(round_cnt)
            # execution
            if agent1_result == 0 and agent2_result == 0:
                env.step(side1_detector_action, side1_fighter_action,
                         side2_detector_action, side2_fighter_action)
            elif agent1_result != 0 and agent2_result != 0:
                env.set_surrender(2)
            elif agent1_result != 0:
                env.set_surrender(0)
            else:
                env.set_surrender(1)
            # get done
            if env.get_done():
                # reward
                o_detector_reward, o_fighter_reward, o_game_reward, e_detector_reward, e_fighter_reward, e_game_reward = env.get_reward(
                )
                if o_game_reward > e_game_reward:
                    side1_win_times += 1
                elif o_game_reward < e_game_reward:
                    side2_win_times += 1
                else:
                    draw_times += 1
                break
    agent1.terminate()
    agent2.terminate()
    return side1_win_times, side2_win_times, draw_times, len(
        agent1_crash_list), len(agent2_crash_list), len(
            agent1_timeout_list), len(agent2_timeout_list), 0, 0

Example #3

Show file

                if red_obs_dict['fighter'][y]['alive']:
                    obs_got_ind[y] = True
                    # tmp_img_obs = red_obs_dict['fighter'][y]['screen']
                    # tmp_img_obs = tmp_img_obs.transpose(2, 0, 1)
                    tmp_info_obs = red_obs_dict['fighter'][y]['info']
                    tmp_action = fighter_model.choose_action(tmp_info_obs)
                    obs_list.append({'info': copy.deepcopy(tmp_info_obs)})
                    action_list.append(tmp_action)
                    # action formation
                    true_action[0] = int(360 / COURSE_NUM *
                                         int(tmp_action[0] / ATTACK_IND_NUM))
                    true_action[3] = int(tmp_action[0] % ATTACK_IND_NUM)
                red_fighter_action.append(true_action)
            red_fighter_action = np.array(red_fighter_action)
            # step
            env.step(red_detector_action, red_fighter_action,
                     blue_detector_action, blue_fighter_action)
            # get reward
            red_detector_reward, red_fighter_reward, red_game_reward, blue_detector_reward, blue_fighter_reward, blue_game_reward = env.get_reward(
            )
            detector_reward = red_detector_reward + red_game_reward
            fighter_reward = red_fighter_reward + red_game_reward

            episodic_reward += sum(fighter_reward)
            # save repaly
            red_obs_dict, blue_obs_dict = env.get_obs()
            for y in range(red_fighter_num):
                if obs_got_ind[y]:
                    # tmp_img_obs = red_obs_dict['fighter'][y]['screen']
                    # tmp_img_obs = tmp_img_obs.transpose(2, 0, 1)
                    tmp_info_obs = red_obs_dict['fighter'][y]['info']
                    fighter_model.store_transition(