예제 #1
0
import tree  # pip install dm_tree
import unittest

import ray
from ray.rllib.algorithms.bc import BC
from ray.rllib.algorithms.pg import PG, DEFAULT_CONFIG
from ray.rllib.examples.env.random_env import RandomEnv
from ray.rllib.offline.json_reader import JsonReader
from ray.rllib.utils.test_utils import framework_iterator

SPACES = {
    "dict":
    Dict({
        "a":
        Dict({
            "aa": Box(-1.0, 1.0, shape=(3, )),
            "ab": MultiDiscrete([4, 3]),
        }),
        "b":
        Discrete(3),
        "c":
        Tuple([Box(0, 10, (2, ), dtype=np.int32),
               Discrete(2)]),
        "d":
        Box(0, 3, (), dtype=np.int64),
    }),
    "tuple":
    Tuple([
        Tuple([
            Box(-1.0, 1.0, shape=(2, )),
            Discrete(3),
예제 #2
0
def check_support(alg, config, stats, check_bounds=False, name=None):
    covered_a = set()
    covered_o = set()
    config["log_level"] = "ERROR"
    first_error = None
    torch = config.get("use_pytorch", False)
    for a_name, action_space in ACTION_SPACES_TO_TEST.items():
        for o_name, obs_space in OBSERVATION_SPACES_TO_TEST.items():
            print("=== Testing {} (torch={}) A={} S={} ===".format(
                alg, torch, action_space, obs_space))
            config.update(
                dict(env_config=dict(action_space=action_space,
                                     observation_space=obs_space,
                                     reward_space=Box(
                                         1.0, 1.0, shape=(), dtype=np.float32),
                                     p_done=1.0,
                                     check_action_bounds=check_bounds)))
            stat = "ok"
            a = None
            try:
                if a_name in covered_a and o_name in covered_o:
                    stat = "skip"  # speed up tests by avoiding full grid
                else:
                    a = get_agent_class(alg)(config=config, env=RandomEnv)
                    if alg not in ["DDPG", "ES", "ARS", "SAC"]:
                        if o_name in ["atari", "image"]:
                            if torch:
                                assert isinstance(a.get_policy().model,
                                                  TorchVisionNetV2)
                            else:
                                assert isinstance(a.get_policy().model,
                                                  VisionNetV2)
                        elif o_name in ["vector", "vector2"]:
                            if torch:
                                assert isinstance(a.get_policy().model,
                                                  TorchFCNetV2)
                            else:
                                assert isinstance(a.get_policy().model,
                                                  FCNetV2)
                    a.train()
                    covered_a.add(a_name)
                    covered_o.add(o_name)
            except UnsupportedSpaceException:
                stat = "unsupported"
            except Exception as e:
                stat = "ERROR"
                print(e)
                print(traceback.format_exc())
                first_error = first_error if first_error is not None else e
            finally:
                if a:
                    try:
                        a.stop()
                    except Exception as e:
                        print("Ignoring error stopping agent", e)
                        pass
            print(stat)
            print()
            stats[name or alg, a_name, o_name] = stat

    # If anything happened, raise error.
    if first_error is not None:
        raise first_error
예제 #3
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        self.original_space = (obs_space.original_space if hasattr(
            obs_space, "original_space") else obs_space)

        self.processed_obs_space = (
            self.original_space
            if model_config.get("_disable_preprocessor_api") else obs_space)

        nn.Module.__init__(self)
        TorchModelV2.__init__(self, self.original_space, action_space,
                              num_outputs, model_config, name)

        self.flattened_input_space = flatten_space(self.original_space)

        # Atari type CNNs or IMPALA type CNNs (with residual layers)?
        # self.cnn_type = self.model_config["custom_model_config"].get(
        #     "conv_type", "atari")

        # Build the CNN(s) given obs_space's image components.
        self.cnns = {}
        self.one_hot = {}
        self.flatten_dims = {}
        self.flatten = {}
        concat_size = 0
        for i, component in enumerate(self.flattened_input_space):
            # Image space.
            if len(component.shape) == 3:
                config = {
                    "conv_filters":
                    model_config["conv_filters"] if "conv_filters"
                    in model_config else get_filter_config(obs_space.shape),
                    "conv_activation":
                    model_config.get("conv_activation"),
                    "post_fcnet_hiddens": [],
                }
                # if self.cnn_type == "atari":
                self.cnns[i] = ModelCatalog.get_model_v2(
                    component,
                    action_space,
                    num_outputs=None,
                    model_config=config,
                    framework="torch",
                    name="cnn_{}".format(i),
                )
                # TODO (sven): add IMPALA-style option.
                # else:
                #    cnn = TorchImpalaVisionNet(
                #        component,
                #        action_space,
                #        num_outputs=None,
                #        model_config=config,
                #        name="cnn_{}".format(i))

                concat_size += self.cnns[i].num_outputs
                self.add_module("cnn_{}".format(i), self.cnns[i])
            # Discrete|MultiDiscrete inputs -> One-hot encode.
            elif isinstance(component, (Discrete, MultiDiscrete)):
                if isinstance(component, Discrete):
                    size = component.n
                else:
                    size = sum(component.nvec)
                config = {
                    "fcnet_hiddens": model_config["fcnet_hiddens"],
                    "fcnet_activation": model_config.get("fcnet_activation"),
                    "post_fcnet_hiddens": [],
                }
                self.one_hot[i] = ModelCatalog.get_model_v2(
                    Box(-1.0, 1.0, (size, ), np.float32),
                    action_space,
                    num_outputs=None,
                    model_config=config,
                    framework="torch",
                    name="one_hot_{}".format(i),
                )
                concat_size += self.one_hot[i].num_outputs
            # Everything else (1D Box).
            else:
                size = int(np.product(component.shape))
                config = {
                    "fcnet_hiddens": model_config["fcnet_hiddens"],
                    "fcnet_activation": model_config.get("fcnet_activation"),
                    "post_fcnet_hiddens": [],
                }
                self.flatten[i] = ModelCatalog.get_model_v2(
                    Box(-1.0, 1.0, (size, ), np.float32),
                    action_space,
                    num_outputs=None,
                    model_config=config,
                    framework="torch",
                    name="flatten_{}".format(i),
                )
                self.flatten_dims[i] = size
                concat_size += self.flatten[i].num_outputs

        # Optional post-concat FC-stack.
        post_fc_stack_config = {
            "fcnet_hiddens": model_config.get("post_fcnet_hiddens", []),
            "fcnet_activation": model_config.get("post_fcnet_activation",
                                                 "relu"),
        }
        self.post_fc_stack = ModelCatalog.get_model_v2(
            Box(float("-inf"),
                float("inf"),
                shape=(concat_size, ),
                dtype=np.float32),
            self.action_space,
            None,
            post_fc_stack_config,
            framework="torch",
            name="post_fc_stack",
        )

        # Actions and value heads.
        self.logits_layer = None
        self.value_layer = None
        self._value_out = None

        if num_outputs:
            # Action-distribution head.
            self.logits_layer = SlimFC(
                in_size=self.post_fc_stack.num_outputs,
                out_size=num_outputs,
                activation_fn=None,
                initializer=torch_normc_initializer(0.01),
            )
            # Create the value branch model.
            self.value_layer = SlimFC(
                in_size=self.post_fc_stack.num_outputs,
                out_size=1,
                activation_fn=None,
                initializer=torch_normc_initializer(0.01),
            )
        else:
            self.num_outputs = concat_size
예제 #4
0
 def obs_space(self):
     low = np.zeros([self._local_width, self._local_width, self._channel_num])
     high = np.ones([self._local_width, self._local_width, self._channel_num])
     return Box(low, high)
예제 #5
0
 def _build_state_space(self, state_names):
     # Docstring of superclass
     low = -1 * np.ones_like(state_names, dtype=float)
     low[self.U_SUP_IDX] = 0.0
     high = np.ones_like(state_names, dtype=float)
     return Box(low, high)
 def observation_space(self):
     #return Discrete(11)
     return Box(low=0, high=1, shape=(1, ))
예제 #7
0
    def __init__(self,
                 observation_space: gym.spaces.Space,
                 action_space: gym.spaces.Space,
                 num_outputs: Optional[int],
                 model_config: ModelConfigDict,
                 name: str,
                 *,
                 num_transformer_units: int = 1,
                 attention_dim: int = 64,
                 num_heads: int = 2,
                 memory_inference: int = 50,
                 memory_training: int = 50,
                 head_dim: int = 32,
                 position_wise_mlp_dim: int = 32,
                 init_gru_gate_bias: float = 2.0):
        """Initializes a GTrXLNet.

        Args:
            num_transformer_units (int): The number of Transformer repeats to
                use (denoted L in [2]).
            attention_dim (int): The input and output dimensions of one
                Transformer unit.
            num_heads (int): The number of attention heads to use in parallel.
                Denoted as `H` in [3].
            memory_inference (int): The number of timesteps to concat (time
                axis) and feed into the next transformer unit as inference
                input. The first transformer unit will receive this number of
                past observations (plus the current one), instead.
            memory_training (int): The number of timesteps to concat (time
                axis) and feed into the next transformer unit as training
                input (plus the actual input sequence of len=max_seq_len).
                The first transformer unit will receive this number of
                past observations (plus the input sequence), instead.
            head_dim (int): The dimension of a single(!) attention head within
                a multi-head attention unit. Denoted as `d` in [3].
            position_wise_mlp_dim (int): The dimension of the hidden layer
                within the position-wise MLP (after the multi-head attention
                block within one Transformer unit). This is the size of the
                first of the two layers within the PositionwiseFeedforward. The
                second layer always has size=`attention_dim`.
            init_gru_gate_bias (float): Initial bias values for the GRU gates
                (two GRUs per Transformer unit, one after the MHA, one after
                the position-wise MLP).
        """

        super().__init__(observation_space, action_space, num_outputs,
                         model_config, name)

        nn.Module.__init__(self)

        self.num_transformer_units = num_transformer_units
        self.attention_dim = attention_dim
        self.num_heads = num_heads
        self.memory_inference = memory_inference
        self.memory_training = memory_training
        self.head_dim = head_dim
        self.max_seq_len = model_config["max_seq_len"]
        self.obs_dim = observation_space.shape[0]

        self.linear_layer = SlimFC(in_size=self.obs_dim,
                                   out_size=self.attention_dim)

        self.layers = [self.linear_layer]

        attention_layers = []
        # 2) Create L Transformer blocks according to [2].
        for i in range(self.num_transformer_units):
            # RelativeMultiHeadAttention part.
            MHA_layer = SkipConnection(
                RelativeMultiHeadAttention(in_dim=self.attention_dim,
                                           out_dim=self.attention_dim,
                                           num_heads=num_heads,
                                           head_dim=head_dim,
                                           input_layernorm=True,
                                           output_activation=nn.ReLU),
                fan_in_layer=GRUGate(self.attention_dim, init_gru_gate_bias))

            # Position-wise MultiLayerPerceptron part.
            E_layer = SkipConnection(nn.Sequential(
                torch.nn.LayerNorm(self.attention_dim),
                SlimFC(in_size=self.attention_dim,
                       out_size=position_wise_mlp_dim,
                       use_bias=False,
                       activation_fn=nn.ReLU),
                SlimFC(in_size=position_wise_mlp_dim,
                       out_size=self.attention_dim,
                       use_bias=False,
                       activation_fn=nn.ReLU)),
                                     fan_in_layer=GRUGate(
                                         self.attention_dim,
                                         init_gru_gate_bias))

            # Build a list of all attanlayers in order.
            attention_layers.extend([MHA_layer, E_layer])

        # Create a Sequential such that all parameters inside the attention
        # layers are automatically registered with this top-level model.
        self.attention_layers = nn.Sequential(*attention_layers)
        self.layers.extend(attention_layers)

        # Final layers if num_outputs not None.
        self.logits = None
        self.values_out = None
        # Last value output.
        self._value_out = None
        # Postprocess GTrXL output with another hidden layer.
        if self.num_outputs is not None:
            self.logits = SlimFC(in_size=self.attention_dim,
                                 out_size=self.num_outputs,
                                 activation_fn=nn.ReLU)

            # Value function used by all RLlib Torch RL implementations.
            self.values_out = SlimFC(in_size=self.attention_dim,
                                     out_size=1,
                                     activation_fn=None)
        else:
            self.num_outputs = self.attention_dim

        # Setup trajectory views (`memory-inference` x past memory outs).
        for i in range(self.num_transformer_units):
            space = Box(-1.0, 1.0, shape=(self.attention_dim, ))
            self.view_requirements["state_in_{}".format(i)] = \
                ViewRequirement(
                    "state_out_{}".format(i),
                    shift="-{}:-1".format(self.memory_inference),
                    # Repeat the incoming state every max-seq-len times.
                    batch_repeat_value=self.max_seq_len,
                    space=space)
            self.view_requirements["state_out_{}".format(i)] = \
                ViewRequirement(
                    space=space,
                    used_for_training=False)
예제 #8
0
from supersuit.basic_transforms.flatten import check_param, change_observation
from gym.spaces import Box
import numpy as np
import pytest

test_obs_space = Box(low=np.float32(0.),
                     high=np.float32(1.),
                     shape=(4, 4, 3),
                     dtype=np.float32)
test_obs = np.zeros([4, 4, 3], dtype=np.float64) + np.arange(3)


def test_param_check():
    check_param(test_obs_space, True)
    with pytest.raises(AssertionError):
        check_param(test_obs_space, 6)


def test_change_observation():
    new_obs = change_observation(test_obs, test_obs_space, True)
    assert new_obs.shape == (4 * 4 * 3, )
예제 #9
0
 def __init__(self, env):
     ObservationWrapper.__init__(self, env)
     self.unwrapped.observation_space = Box(-np.inf, np.inf, (7, ))
예제 #10
0
def discrete2box4class(discrete_space):
    return Box(0.0, 1.0, discrete_space.n)
예제 #11
0
 def __init__(self, env, skip=4):
     super(CustomSkipFrame, self).__init__(env)
     self.observation_space = Box(low=0, high=255, shape=(skip, 84, 84))
     self.skip = skip
     self.states = np.zeros((skip, 84, 84), dtype=np.float32)
    def __init__(
            self,
            hand_low=(-0.5, 0.40, 0.05),
            hand_high=(0.5, 1, 0.5),
            obj_low=(0., 0.6, 0.015),
            obj_high=(0., 0.6, 0.015),
            random_init=False,
            # tasks = [{'goal': 0.75,  'obj_init_pos':-0.2, 'obj_init_angle': 0.3}],
            # tasks = [{'goal': np.array([0., 0.75, 0.04]),  'obj_init_pos':np.array([0., 0.9, 0.04]), 'obj_init_angle': 0.3}],
            tasks=[{
                'goal': np.array([0., 0.85, 0.02]),
                'obj_init_pos': np.array([0., 0.6, 0.015]),
                'obj_init_angle': 0.3
            }],
            goal_low=(-0.1, 0.85, 0.02),
            goal_high=(0.1, 0.9, 0.02),
            hand_init_pos=(0, 0.6, 0.2),
            rotMode='fixed',  #'fixed',
            obs_type='plain',
            multitask=False,
            multitask_num=1,
            if_render=False,
            **kwargs):
        self.quick_init(locals())
        SawyerXYZEnv.__init__(self,
                              frame_skip=5,
                              action_scale=1. / 100,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)
        if obj_low is None:
            obj_low = self.hand_low

        if goal_low is None:
            goal_low = self.hand_low

        if obj_high is None:
            obj_high = self.hand_high

        if goal_high is None:
            goal_high = self.hand_high

        assert obs_type in OBS_TYPE
        if multitask:
            obs_type = 'with_goal_and_id'
        self.obs_type = obs_type

        self.random_init = random_init
        self.max_path_length = 150  #150
        self.tasks = tasks
        self.num_tasks = len(tasks)
        self.rotMode = rotMode
        self.hand_init_pos = np.array(hand_init_pos)
        self.multitask = multitask
        self.multitask_num = multitask_num
        self._state_goal_idx = np.zeros(self.multitask_num)
        self.if_render = if_render
        if rotMode == 'fixed':
            self.action_space = Box(
                np.array([-1, -1, -1, -1]),
                np.array([1, 1, 1, 1]),
            )
        elif rotMode == 'rotz':
            self.action_rot_scale = 1. / 50
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi, -1]),
                np.array([1, 1, 1, np.pi, 1]),
            )
        elif rotMode == 'quat':
            self.action_space = Box(
                np.array([-1, -1, -1, 0, -1, -1, -1, -1]),
                np.array([1, 1, 1, 2 * np.pi, 1, 1, 1, 1]),
            )
        else:
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi / 2, -np.pi / 2, 0, -1]),
                np.array([1, 1, 1, np.pi / 2, np.pi / 2, np.pi * 2, 1]),
            )
        self.obj_and_goal_space = Box(
            np.hstack((obj_low, goal_low)),
            np.hstack((obj_high, goal_high)),
        )
        self.goal_space = Box(np.array(goal_low), np.array(goal_high))
        if not multitask and self.obs_type == 'with_goal_id':
            self.observation_space = Box(
                np.hstack(
                    (self.hand_low, obj_low, goal_low, np.zeros(len(tasks)))),
                np.hstack((self.hand_high, obj_high, goal_high,
                           np.ones(len(tasks)))),
            )
        elif not multitask and self.obs_type == 'plain':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low)),
                np.hstack((self.hand_high, obj_high)),
            )
        elif not multitask and self.obs_type == 'with_goal':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, goal_low)),
                np.hstack((self.hand_high, obj_high, goal_high)),
            )
        else:
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, goal_low,
                           np.zeros(multitask_num))),
                np.hstack((self.hand_high, obj_high, goal_high,
                           np.zeros(multitask_num))),
            )

        self.reset()
예제 #13
0
    def __init__(self,
                 seed=None,
                 spawn_rate=20,
                 num_archers=2,
                 num_knights=2,
                 killable_knights=True,
                 killable_archers=True,
                 pad_observation=True,
                 black_death=True,
                 line_death=False,
                 max_frames=900):
        # Game Constants
        self.ZOMBIE_SPAWN = spawn_rate
        self.FPS = 90
        self.WIDTH = 1280
        self.HEIGHT = 720
        self.max_frames = 500
        self.frames = 0
        self.pad_observation = pad_observation
        self.killable_knights = killable_knights
        self.killable_archers = killable_archers
        self.black_death = black_death
        self.line_death = line_death
        self.has_reset = False
        self.np_random, seed = seeding.np_random(seed)

        # Dictionaries for holding new players and their weapons
        self.archer_dict = {}
        self.knight_dict = {}
        self.arrow_dict = {}
        self.sword_dict = {}

        # Game Variables
        self.frame_count = 0
        self.score = 0
        self.run = True
        self.arrow_spawn_rate = self.sword_spawn_rate = self.zombie_spawn_rate = 0
        self.knight_player_num = self.archer_player_num = 0
        self.archer_killed = False
        self.knight_killed = False
        self.sword_killed = False
        self.closed = False

        # Creating Sprite Groups
        self.all_sprites = pygame.sprite.Group()
        self.zombie_list = pygame.sprite.Group()
        self.arrow_list = pygame.sprite.Group()
        self.sword_list = pygame.sprite.Group()
        self.archer_list = pygame.sprite.Group()
        self.knight_list = pygame.sprite.Group()

        # If black_death, this represents agents to remove at end of cycle
        self.kill_list = []

        self.num_archers = num_archers
        self.num_knights = num_knights

        # Initializing Pygame
        self.render_on = False
        pygame.init()
        # self.WINDOW = pygame.display.set_mode([self.WIDTH, self.HEIGHT])
        self.WINDOW = pygame.Surface((self.WIDTH, self.HEIGHT))
        pygame.display.set_caption("Knights, Archers, Zombies")
        self.clock = pygame.time.Clock()
        self.left_wall = get_image(os.path.join('img', 'left_wall.png'))
        self.right_wall = get_image(os.path.join('img', 'right_wall.png'))
        self.right_wall_rect = self.right_wall.get_rect()
        self.right_wall_rect.left = self.WIDTH - self.right_wall_rect.width
        self.floor_patch1 = get_image(os.path.join('img', 'patch1.png'))
        self.floor_patch2 = get_image(os.path.join('img', 'patch2.png'))
        self.floor_patch3 = get_image(os.path.join('img', 'patch3.png'))
        self.floor_patch4 = get_image(os.path.join('img', 'patch4.png'))

        self.agent_list = []
        self.agents = []

        for i in range(num_archers):
            name = "archer_" + str(i)
            self.archer_dict["archer{0}".format(
                self.archer_player_num)] = Archer(agent_name=name)
            self.archer_dict["archer{0}".format(
                self.archer_player_num)].offset(i * 50, 0)
            self.archer_list.add(self.archer_dict["archer{0}".format(
                self.archer_player_num)])
            self.all_sprites.add(self.archer_dict["archer{0}".format(
                self.archer_player_num)])
            self.agent_list.append(self.archer_dict["archer{0}".format(
                self.archer_player_num)])
            if i != num_archers - 1:
                self.archer_player_num += 1

        for i in range(num_knights):
            name = "knight_" + str(i)
            self.knight_dict["knight{0}".format(
                self.knight_player_num)] = Knight(agent_name=name)
            self.knight_dict["knight{0}".format(
                self.knight_player_num)].offset(i * 50, 0)
            self.knight_list.add(self.knight_dict["knight{0}".format(
                self.knight_player_num)])
            self.all_sprites.add(self.knight_dict["knight{0}".format(
                self.knight_player_num)])
            self.agent_list.append(self.knight_dict["knight{0}".format(
                self.knight_player_num)])
            if i != num_knights - 1:
                self.knight_player_num += 1

        self.agent_name_mapping = {}
        a_count = 0
        for i in range(num_archers):
            a_name = "archer_" + str(i)
            self.agents.append(a_name)
            self.agent_name_mapping[a_name] = a_count
            a_count += 1
        for i in range(num_knights):
            k_name = "knight_" + str(i)
            self.agents.append(k_name)
            self.agent_name_mapping[k_name] = a_count
            a_count += 1

        self.observation_spaces = dict(
            zip(self.agents, [
                Box(low=0, high=255, shape=(512, 512, 3), dtype=np.uint8)
                for _ in enumerate(self.agents)
            ]))
        self.action_spaces = dict(
            zip(self.agents, [Discrete(6) for _ in enumerate(self.agents)]))
        self.display_wait = 0.0

        self._agent_selector = agent_selector(self.agents)
        self.num_agents = len(self.agents)
        self.reinit()
예제 #14
0
    def __init__(self):
        """
        Initialize agent.
        Args:
            hyperparams: Dictionary of hyperparameters.
            init_node: Whether or not to initialize a new ROS node.
        """
        rospy.init_node('baxter_reacher')
        self.dt = BAXTER['dt']
        self.RATE = 1. / self.dt
        self.using_torque = BAXTER['using_torque']
        self.using_left_arm = BAXTER['using_left_arm']

        # init baxter interface
        self._llimb = baxter_interface.Limb("left")
        self._rlimb = baxter_interface.Limb("right")
        self.kin_left = baxter_kinematics('left')
        self.kin_right = baxter_kinematics("right")

        print("Getting robot state... ")
        self._rs = baxter_interface.RobotEnable(CHECK_VERSION)
        self._init_state = self._rs.state().enabled
        print("Enabling robot... ")
        self._rs.enable()
        self._pub_rate = rospy.Publisher('robot/joint_state_publish_rate',
                                         UInt16,
                                         queue_size=10)
        self._pub_rate.publish(self.RATE)
        self.rate = rospy.Rate(self.RATE)

        self._max_time_step = 50
        self._step_counter = 0

        self.action_norm_factor = REAL_ACT_SPACE_HIGH

        # to control the orientation, we set 3 points on the gripper frame
        self.ee_points = np.array([[0., -0.04, 0.07], [0., 0.04, 0.07],
                                   [0., 0., 0.]])

        self.observation_space = Box(-np.inf * np.ones(10),
                                     np.inf * np.ones(10))
        self.action_space = Box(-np.ones(7, dtype=np.float32),
                                np.ones(7, dtype=np.float32))

        self.indicator_flag = False

        self.num_envs = 1
        self.clipob = 10
        self.cliprew = 10

        self.ob_rms = RunningMeanStd(shape=self.observation_space.shape)
        self.ret_rms = RunningMeanStd(shape=())
        self.ret = np.zeros(1)
        self.eprew = np.zeros(1)
        self.gamma = BAXTER["gamma"]
        self.epsilon = BAXTER["epsilon"]
        self._done = np.array(False).repeat(self.eprew.shape)
        self.prev_dones = np.array(False).repeat(self.eprew.shape)
        self._reward = np.zeros(1, dtype=np.float32)
        self._obs = np.zeros((1, ) + self.observation_space.shape,
                             dtype=np.float32)
예제 #15
0
class ContB6BridgeConverter(ContDynamicallyAveragedConverter):
    """
    The continuous B6 bridge converter (B6C) is simulated with three continuous 2QC.

    Key:
        'Cont-B6C'

    Actions:
        The Duty Cycle for each half brigde in the range of (-1,1)

    Action Space:
        Box(-1, 1, shape=(3,))

    Output Voltages and Currents:
        | voltages: (-1,1)
        | currents: (-1,1)

    Output Voltage Space:
        Box(-0.5, 0.5, shape=(3,))
    """

    action_space = Box(-1, 1, shape=(3,))
    # Only positive voltages can be applied
    voltages = (-1, 1)
    # Positive and negative currents are possible
    currents = (-1, 1)

    _reset_action = [0, 0, 0]

    def __init__(self, tau=1e-4, **kwargs):
        # Docstring in base class
        super().__init__(tau=tau, **kwargs)
        self._subconverters = [
            ContTwoQuadrantConverter(tau=tau, **kwargs),
            ContTwoQuadrantConverter(tau=tau, **kwargs),
            ContTwoQuadrantConverter(tau=tau, **kwargs),
        ]

    def reset(self):
        # Docstring in base class
        return [
            self._subconverters[0].reset()[0] - 0.5,
            self._subconverters[1].reset()[0] - 0.5,
            self._subconverters[2].reset()[0] - 0.5,
        ]

    def convert(self, i_out, t):
        # Docstring in base class
        u_out = [
            self._subconverters[0].convert([i_out[0]], t)[0] - 0.5,
            self._subconverters[1].convert([i_out[1]], t)[0] - 0.5,
            self._subconverters[2].convert([i_out[2]], t)[0] - 0.5
        ]
        return u_out

    def set_action(self, action, t):
        # Docstring in base class
        times = []
        times += self._subconverters[0].set_action([0.5 * (action[0] + 1)], t)
        times += self._subconverters[1].set_action([0.5 * (action[1] + 1)], t)
        times += self._subconverters[2].set_action([0.5 * (action[2] + 1)], t)
        return sorted(list(set(times)))

    def _convert(self, i_in, t):
        # Not used
        pass

    def i_sup(self, i_out):
        # Docstring in base class
        return sum([subconverter.i_sup([i_out_]) for subconverter, i_out_ in zip(self._subconverters, i_out)])
예제 #16
0
 def __init__(self, env):
     super().__init__(env)
     self.n = None
     if isinstance(self.env.observation_space, Discrete):
         self.n = self.env.observation_space.n
         self.observation_space = Box(0, 1, (self.n, ))
 def action_space(self):
     return Box(low=0, high=1, shape=(1, ))
 def observation_space(self):
     return Box(low=0, high=1.0, shape=(25, ), dtype=np.float32)
예제 #19
0
    def __init__(
        self,
        input_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        *,
        name: str,
        max_seq_len: int = 20,
        num_transformer_units: int = 1,
        attention_dim: int = 64,
        num_heads: int = 2,
        memory_inference: int = 50,
        memory_training: int = 50,
        head_dim: int = 32,
        position_wise_mlp_dim: int = 32,
        init_gru_gate_bias: float = 2.0,
    ):
        """Initializes a GTrXLNet instance.

        Args:
            num_transformer_units (int): The number of Transformer repeats to
                use (denoted L in [2]).
            attention_dim (int): The input and output dimensions of one
                Transformer unit.
            num_heads (int): The number of attention heads to use in parallel.
                Denoted as `H` in [3].
            memory_inference (int): The number of timesteps to concat (time
                axis) and feed into the next transformer unit as inference
                input. The first transformer unit will receive this number of
                past observations (plus the current one), instead.
            memory_training (int): The number of timesteps to concat (time
                axis) and feed into the next transformer unit as training
                input (plus the actual input sequence of len=max_seq_len).
                The first transformer unit will receive this number of
                past observations (plus the input sequence), instead.
            head_dim (int): The dimension of a single(!) attention head within
                a multi-head attention unit. Denoted as `d` in [3].
            position_wise_mlp_dim (int): The dimension of the hidden layer
                within the position-wise MLP (after the multi-head attention
                block within one Transformer unit). This is the size of the
                first of the two layers within the PositionwiseFeedforward. The
                second layer always has size=`attention_dim`.
            init_gru_gate_bias (float): Initial bias values for the GRU gates
                (two GRUs per Transformer unit, one after the MHA, one after
                the position-wise MLP).
        """

        super().__init__(name=name)

        self.num_transformer_units = num_transformer_units
        self.attention_dim = attention_dim
        self.num_heads = num_heads
        self.memory_inference = memory_inference
        self.memory_training = memory_training
        self.head_dim = head_dim
        self.max_seq_len = max_seq_len
        self.obs_dim = input_space.shape[0]

        # Raw observation input (plus (None) time axis).
        input_layer = tf.keras.layers.Input(
            shape=(
                None,
                self.obs_dim,
            ),
            name="inputs",
        )
        memory_ins = [
            tf.keras.layers.Input(
                shape=(
                    None,
                    self.attention_dim,
                ),
                dtype=tf.float32,
                name="memory_in_{}".format(i),
            ) for i in range(self.num_transformer_units)
        ]

        # Map observation dim to input/output transformer (attention) dim.
        E_out = tf.keras.layers.Dense(self.attention_dim)(input_layer)
        # Output, collected and concat'd to build the internal, tau-len
        # Memory units used for additional contextual information.
        memory_outs = [E_out]

        # 2) Create L Transformer blocks according to [2].
        for i in range(self.num_transformer_units):
            # RelativeMultiHeadAttention part.
            MHA_out = SkipConnection(
                RelativeMultiHeadAttention(
                    out_dim=self.attention_dim,
                    num_heads=num_heads,
                    head_dim=head_dim,
                    input_layernorm=True,
                    output_activation=tf.nn.relu,
                ),
                fan_in_layer=GRUGate(init_gru_gate_bias),
                name="mha_{}".format(i + 1),
            )(E_out, memory=memory_ins[i])
            # Position-wise MLP part.
            E_out = SkipConnection(
                tf.keras.Sequential((
                    tf.keras.layers.LayerNormalization(axis=-1),
                    PositionwiseFeedforward(
                        out_dim=self.attention_dim,
                        hidden_dim=position_wise_mlp_dim,
                        output_activation=tf.nn.relu,
                    ),
                )),
                fan_in_layer=GRUGate(init_gru_gate_bias),
                name="pos_wise_mlp_{}".format(i + 1),
            )(MHA_out)
            # Output of position-wise MLP == E(l-1), which is concat'd
            # to the current Mem block (M(l-1)) to yield E~(l-1), which is then
            # used by the next transformer block.
            memory_outs.append(E_out)

        self._logits = None
        self._value_out = None

        self.trxl_model = tf.keras.Model(inputs=[input_layer] + memory_ins,
                                         outputs=[E_out] + memory_outs[:-1])

        self.view_requirements = {
            SampleBatch.OBS: ViewRequirement(space=input_space),
        }
        # Setup trajectory views (`memory-inference` x past memory outs).
        for i in range(self.num_transformer_units):
            space = Box(-1.0, 1.0, shape=(self.attention_dim, ))
            self.view_requirements["state_in_{}".format(i)] = ViewRequirement(
                "state_out_{}".format(i),
                shift="-{}:-1".format(self.memory_inference),
                # Repeat the incoming state every max-seq-len times.
                batch_repeat_value=self.max_seq_len,
                space=space,
            )
            self.view_requirements["state_out_{}".format(i)] = ViewRequirement(
                space=space, used_for_training=False)
예제 #20
0
파일: deed.py 프로젝트: mathieu-reymond/gym
    def __init__(self):
        super(DEED, self).__init__()

        self.u_holder = np.array([
            [
                150, 470, 786.7988, 38.5397, 0.1524, 450, 0.041, 103.3908,
                -2.4444, 0.0312, 0.5035, 0.0207, 80, 80
            ],
            [
                135, 470, 451.3251, 46.1591, 0.1058, 600, 0.036, 103.3908,
                -2.4444, 0.0312, 0.5035, 0.0207, 80, 80
            ],
            [
                73, 340, 1049.9977, 40.3965, 0.0280, 320, 0.038, 300.3910,
                -4.0695, 0.0509, 0.4968, 0.0202, 80, 80
            ],
            [
                60, 300, 1243.5311, 38.3055, 0.0354, 260, 0.052, 300.3910,
                -4.0695, 0.0509, 0.4968, 0.0202, 50, 50
            ],
            [
                73, 243, 1658.5696, 36.3278, 0.0211, 280, 0.063, 320.0006,
                -3.8132, 0.0344, 0.4972, 0.0200, 50, 50
            ],
            [
                57, 160, 1356.6592, 38.2704, 0.0179, 310, 0.048, 320.0006,
                -3.8132, 0.0344, 0.4972, 0.0200, 50, 50
            ],
            [
                20, 130, 1450.7045, 36.5104, 0.0121, 300, 0.086, 330.0056,
                -3.9023, 0.0465, 0.5163, 0.0214, 30, 30
            ],
            [
                47, 120, 1450.7045, 36.5104, 0.0121, 340, 0.082, 330.0056,
                -3.9023, 0.0465, 0.5163, 0.0214, 30, 30
            ],
            [
                20, 80, 1455.6056, 39.5804, 0.1090, 270, 0.098, 350.0056,
                -3.9524, 0.0465, 0.5475, 0.0234, 30, 30
            ],
            [
                10, 55, 1469.4026, 40.5407, 0.1295, 380, 0.094, 360.0012,
                -3.9864, 0.0470, 0.5475, 0.0234, 30, 30
            ],
        ])

        self.pdm_hold = np.array([
            1036, 1110, 1258, 1406, 1480, 1628, 1702, 1776, 1924, 2022, 2106,
            2150, 2072, 1924, 1776, 1554, 1480, 1628, 1776, 1972, 1924, 1628,
            1332, 1184
        ])

        self.b = np.array([
            [
                0.000049, 0.000015, 0.000015, 0.000015, 0.000016, 0.000017,
                0.000017, 0.000018, 0.000019, 0.000020
            ],
            [
                0.000014, 0.000045, 0.000016, 0.000016, 0.000017, 0.000015,
                0.000015, 0.000016, 0.000018, 0.000018
            ],
            [
                0.000015, 0.000016, 0.000039, 0.000010, 0.000012, 0.000012,
                0.000014, 0.000014, 0.000016, 0.000016
            ],
            [
                0.000015, 0.000016, 0.000010, 0.000040, 0.000014, 0.000010,
                0.000011, 0.000012, 0.000014, 0.000015
            ],
            [
                0.000016, 0.000017, 0.000012, 0.000014, 0.000035, 0.000011,
                0.000013, 0.000013, 0.000015, 0.000016
            ],
            [
                0.000017, 0.000015, 0.000012, 0.000010, 0.000011, 0.000036,
                0.000012, 0.000012, 0.000014, 0.000015
            ],
            [
                0.000017, 0.000015, 0.000014, 0.000011, 0.000013, 0.000012,
                0.000038, 0.000016, 0.000016, 0.000018
            ],
            [
                0.000018, 0.000016, 0.000014, 0.000012, 0.000013, 0.000012,
                0.000016, 0.000040, 0.000015, 0.000016
            ],
            [
                0.000019, 0.000018, 0.000016, 0.000014, 0.000015, 0.000014,
                0.000016, 0.000015, 0.000042, 0.000019
            ],
            [
                0.000020, 0.000018, 0.000016, 0.000014, 0.000016, 0.000015,
                0.000018, 0.000016, 0.000019, 0.000044
            ],
        ])

        self.violation_multiplier = 1e6  # constant
        self.n_actions = 101
        # [Actions, Agents] transpose -> [Agents, Actions]
        self.action_powers = np.linspace(self.u_holder[:, 0],
                                         self.u_holder[:, 1], self.n_actions).T

        self.action_space = MultiDiscrete(
            np.ones(len(self.u_holder) - 1) * self.n_actions)
        self.observation_space = MultiDiscrete(
            np.ones((len(self.u_holder) - 1, 2)) * 20)
        self.reward_space = Box(-np.ones(3) * np.inf, np.zeros(3))
    def __init__(
            self,
            hand_low=(-0.5, 0.40, 0.05),
            hand_high=(0.5, 1, 0.5),
            obj_low=(-0.1, 0.5, 0.02),
            obj_high=(0.1, 0.6, 0.02),
            random_init=False,
            tasks=[{
                'stick_init_pos': np.array([0., 0.6, 0.02])
            }],
            goal_low=None,
            goal_high=None,
            hand_init_pos=(0, 0.6, 0.2),
            liftThresh=0.04,
            rotMode='fixed',  #'fixed',
            rewMode='orig',
            obs_type='plain',
            multitask=False,
            multitask_num=1,
            if_render=False,
            **kwargs):
        self.quick_init(locals())
        SawyerXYZEnv.__init__(self,
                              frame_skip=5,
                              action_scale=1. / 100,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)
        assert obs_type in OBS_TYPE
        if multitask:
            obs_type = 'with_goal_and_id'
        self.obs_type = obs_type
        if obj_low is None:
            obj_low = self.hand_low

        if goal_low is None:
            goal_low = self.hand_low

        if obj_high is None:
            obj_high = self.hand_high

        if goal_high is None:
            goal_high = self.hand_high

        self.random_init = random_init
        self.liftThresh = liftThresh
        self.max_path_length = 150  #150
        self.tasks = tasks
        self.num_tasks = len(tasks)
        self.rewMode = rewMode
        self.rotMode = rotMode
        self.hand_init_pos = np.array(hand_init_pos)
        self.multitask = multitask
        self.multitask_num = multitask_num
        self.if_render = if_render
        self._state_goal_idx = np.zeros(self.multitask_num)
        if rotMode == 'fixed':
            self.action_space = Box(
                np.array([-1, -1, -1, -1]),
                np.array([1, 1, 1, 1]),
            )
        elif rotMode == 'rotz':
            self.action_rot_scale = 1. / 50
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi, -1]),
                np.array([1, 1, 1, np.pi, 1]),
            )
        elif rotMode == 'quat':
            self.action_space = Box(
                np.array([-1, -1, -1, 0, -1, -1, -1, -1]),
                np.array([1, 1, 1, 2 * np.pi, 1, 1, 1, 1]),
            )
        else:
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi / 2, -np.pi / 2, 0, -1]),
                np.array([1, 1, 1, np.pi / 2, np.pi / 2, np.pi * 2, 1]),
            )

        # For now, fix the object initial position.
        self.obj_init_pos = np.array([0.2, 0.6, 0.04])
        self.obj_init_qpos = np.array([0.0, 0.0])
        self.obj_space = Box(np.array(obj_low), np.array(obj_high))
        self.goal_space = Box(np.array(goal_low)[:2], np.array(goal_high)[:2])
        self.obs_type = obs_type
        if not multitask and self.obs_type == 'with_goal_id':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, np.zeros(len(tasks)))),
                np.hstack((self.hand_high, obj_high, np.ones(len(tasks)))),
            )
        elif not multitask and self.obs_type == 'plain':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, obj_low)),
                np.hstack((self.hand_high, obj_high, obj_high)),
            )
        if not multitask and self.obs_type == 'with_goal':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, obj_low)),
                np.hstack((self.hand_high, obj_high, obj_high)),
            )
        elif not multitask and self.obs_type == 'with_goal':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, goal_low)),
                np.hstack((self.hand_high, obj_high, goal_high)),
            )
        else:
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, goal_low,
                           np.zeros(multitask_num))),
                np.hstack((self.hand_high, obj_high, goal_high,
                           np.zeros(multitask_num))),
            )
        self.reset()
예제 #22
0
    def get_policy_configs_for_game(
            game_name: str) -> Tuple[dict, Callable[[AgentID], PolicyID]]:

        # The RLlib server must know about the Spaces that the Client will be
        # using inside Unity3D, up-front.
        obs_spaces = {
            # 3DBall.
            "3DBall":
            Box(float("-inf"), float("inf"), (8, )),
            # 3DBallHard.
            "3DBallHard":
            Box(float("-inf"), float("inf"), (45, )),
            # Pyramids.
            "Pyramids":
            TupleSpace([
                Box(float("-inf"), float("inf"), (56, )),
                Box(float("-inf"), float("inf"), (56, )),
                Box(float("-inf"), float("inf"), (56, )),
                Box(float("-inf"), float("inf"), (4, )),
            ]),
            # SoccerStrikersVsGoalie.
            "Goalie":
            Box(float("-inf"), float("inf"), (738, )),
            "Striker":
            TupleSpace([
                Box(float("-inf"), float("inf"), (231, )),
                Box(float("-inf"), float("inf"), (63, )),
            ]),
            # Tennis.
            "Tennis":
            Box(float("-inf"), float("inf"), (27, )),
            # VisualHallway.
            "VisualHallway":
            Box(float("-inf"), float("inf"), (84, 84, 3)),
            # Walker.
            "Walker":
            Box(float("-inf"), float("inf"), (212, )),
            # FoodCollector.
            "FoodCollector":
            TupleSpace([
                Box(float("-inf"), float("inf"), (49, )),
                Box(float("-inf"), float("inf"), (4, )),
            ]),
        }
        action_spaces = {
            # 3DBall.
            "3DBall": Box(float("-inf"), float("inf"), (2, ),
                          dtype=np.float32),
            # 3DBallHard.
            "3DBallHard": Box(float("-inf"),
                              float("inf"), (2, ),
                              dtype=np.float32),
            # Pyramids.
            "Pyramids": MultiDiscrete([5]),
            # SoccerStrikersVsGoalie.
            "Goalie": MultiDiscrete([3, 3, 3]),
            "Striker": MultiDiscrete([3, 3, 3]),
            # Tennis.
            "Tennis": Box(float("-inf"), float("inf"), (3, )),
            # VisualHallway.
            "VisualHallway": MultiDiscrete([5]),
            # Walker.
            "Walker": Box(float("-inf"), float("inf"), (39, )),
            # FoodCollector.
            "FoodCollector": MultiDiscrete([3, 3, 3, 2]),
        }

        # Policies (Unity: "behaviors") and agent-to-policy mapping fns.
        if game_name == "SoccerStrikersVsGoalie":
            policies = {
                "Goalie":
                (None, obs_spaces["Goalie"], action_spaces["Goalie"], {}),
                "Striker":
                (None, obs_spaces["Striker"], action_spaces["Striker"], {}),
            }

            def policy_mapping_fn(agent_id):
                return "Striker" if "Striker" in agent_id else "Goalie"

        else:
            policies = {
                game_name:
                (None, obs_spaces[game_name], action_spaces[game_name], {}),
            }

            def policy_mapping_fn(agent_id):
                return game_name

        return policies, policy_mapping_fn
예제 #23
0
 def __init__(self, obs_space, bricks_rows=3, bricks_cols=3):
     output_space = Box(low=0,
                        high=1,
                        shape=(bricks_cols, bricks_rows),
                        dtype=np.uint8)
     super().__init__(obs_space, output_space)
예제 #24
0
    def test_get_single_step_input_dict_batch_repeat_value_1(self):
        """Test whether a SampleBatch produces the correct 1-step input dict."""
        space = Box(-1.0, 1.0, ())

        # With batch-repeat-value==1: state_in_0 is built each timestep.
        view_reqs = {
            "state_in_0":
            ViewRequirement(
                data_col="state_out_0",
                shift="-5:-1",
                space=space,
                batch_repeat_value=1,
            ),
            "state_out_0":
            ViewRequirement(space=space, used_for_compute_actions=False),
        }

        # Trajectory of 1 ts (0) (we would like to compute the 1st).
        batch = SampleBatch({
            "state_in_0": np.array([
                [0, 0, 0, 0, 0],  # ts=0
            ]),
            "state_out_0": np.array([1]),
        })
        input_dict = batch.get_single_step_input_dict(
            view_requirements=view_reqs, index="last")
        check(
            input_dict,
            {
                "state_in_0": [[0, 0, 0, 0, 1]],  # ts=1
                "seq_lens": [1],
            },
        )

        # Trajectory of 6 ts (0-5) (we would like to compute the 6th).
        batch = SampleBatch({
            "state_in_0":
            np.array([
                [0, 0, 0, 0, 0],  # ts=0
                [0, 0, 0, 0, 1],  # ts=1
                [0, 0, 0, 1, 2],  # ts=2
                [0, 0, 1, 2, 3],  # ts=3
                [0, 1, 2, 3, 4],  # ts=4
                [1, 2, 3, 4, 5],  # ts=5
            ]),
            "state_out_0":
            np.array([1, 2, 3, 4, 5, 6]),
        })
        input_dict = batch.get_single_step_input_dict(
            view_requirements=view_reqs, index="last")
        check(
            input_dict,
            {
                "state_in_0": [[2, 3, 4, 5, 6]],  # ts=6
                "seq_lens": [1],
            },
        )

        # Trajectory of 12 ts (0-11) (we would like to compute the 12th).
        batch = SampleBatch({
            "state_in_0":
            np.array([
                [0, 0, 0, 0, 0],  # ts=0
                [0, 0, 0, 0, 1],  # ts=1
                [0, 0, 0, 1, 2],  # ts=2
                [0, 0, 1, 2, 3],  # ts=3
                [0, 1, 2, 3, 4],  # ts=4
                [1, 2, 3, 4, 5],  # ts=5
                [2, 3, 4, 5, 6],  # ts=6
                [3, 4, 5, 6, 7],  # ts=7
                [4, 5, 6, 7, 8],  # ts=8
                [5, 6, 7, 8, 9],  # ts=9
                [6, 7, 8, 9, 10],  # ts=10
                [7, 8, 9, 10, 11],  # ts=11
            ]),
            "state_out_0":
            np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
        })
        input_dict = batch.get_single_step_input_dict(
            view_requirements=view_reqs, index="last")
        check(
            input_dict,
            {
                "state_in_0": [[8, 9, 10, 11, 12]],  # ts=12
                "seq_lens": [1],
            },
        )
예제 #25
0
    MultiAgentMountainCar
from ray.rllib.examples.env.random_env import RandomEnv
from ray.rllib.models.tf.fcnet import FullyConnectedNetwork as FCNetV2
from ray.rllib.models.tf.visionnet import VisionNetwork as VisionNetV2
from ray.rllib.models.torch.visionnet import VisionNetwork as TorchVisionNetV2
from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFCNetV2
from ray.rllib.utils.error import UnsupportedSpaceException
from ray.tune.registry import register_env

tf = try_import_tf()

ACTION_SPACES_TO_TEST = {
    "discrete":
    Discrete(5),
    "vector":
    Box(-1.0, 1.0, (5, ), dtype=np.float32),
    "vector2":
    Box(-1.0, 1.0, (
        5,
        5,
    ), dtype=np.float32),
    "multidiscrete":
    MultiDiscrete([1, 2, 3, 4]),
    "tuple":
    Tuple([Discrete(2),
           Discrete(3),
           Box(-1.0, 1.0, (5, ), dtype=np.float32)]),
    "dict":
    Dict({
        "action_choice":
        Discrete(3),
예제 #26
0
def test_predict_target():
    '''Test predict_target method of Critic.'''
    policy_module = Policy(Box(0, 1, shape=(4, )), Box(0, 1, shape=(2, )))
    observation = tf.placeholder(tf.float32, shape=(None, 4))
    predict_target_op = policy_module.predict_target(observation)
    assert predict_target_op.shape.as_list() == [None, 2]
예제 #27
0
 def __init__(self, config):
     self.zeros = np.zeros((84, 84, 4))
     self.action_space = Discrete(2)
     self.observation_space = Box(0.0, 1.0, shape=(84, 84, 4), dtype=np.float32)
     self.i = 0
예제 #28
0
    def __init__(
            self,
            reward_info=None,
            frame_skip=50,
            pos_action_scale=2. / 100,
            randomize_goals=True,
            hide_goal=False,
            init_block_low=(-0.05, 0.55),
            init_block_high=(0.05, 0.65),
            puck_goal_low=(-0.05, 0.55),
            puck_goal_high=(0.05, 0.65),
            hand_goal_low=(-0.05, 0.55),
            hand_goal_high=(0.05, 0.65),
            fixed_puck_goal=(0.05, 0.6),
            fixed_hand_goal=(-0.05, 0.6),
            mocap_low=(-0.1, 0.5, 0.0),
            mocap_high=(0.1, 0.7, 0.5),
            force_puck_in_goal_space=False,
    ):
        self.quick_init(locals())
        self.reward_info = reward_info
        self.randomize_goals = randomize_goals
        self._pos_action_scale = pos_action_scale
        self.hide_goal = hide_goal

        self.init_block_low = np.array(init_block_low)
        self.init_block_high = np.array(init_block_high)
        self.puck_goal_low = np.array(puck_goal_low)
        self.puck_goal_high = np.array(puck_goal_high)
        self.hand_goal_low = np.array(hand_goal_low)
        self.hand_goal_high = np.array(hand_goal_high)
        self.fixed_puck_goal = np.array(fixed_puck_goal)
        self.fixed_hand_goal = np.array(fixed_hand_goal)
        self.mocap_low = np.array(mocap_low)
        self.mocap_high = np.array(mocap_high)
        self.force_puck_in_goal_space = force_puck_in_goal_space

        self._goal_xyxy = self.sample_goal_xyxy()
        # MultitaskEnv.__init__(self, distance_metric_order=2)
        MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip)

        self.action_space = Box(
            np.array([-1, -1]),
            np.array([1, 1]),
        )
        self.obs_box = Box(
            np.array([-0.2, 0.5, -0.2, 0.5]),
            np.array([0.2, 0.7, 0.2, 0.7]),
        )
        goal_low = np.concatenate((self.hand_goal_low, self.puck_goal_low))
        goal_high = np.concatenate((self.hand_goal_high, self.puck_goal_high))
        self.goal_box = Box(
            goal_low,
            goal_high,
        )
        self.observation_space = Dict([
            ('observation', self.obs_box),
            ('state_observation', self.obs_box),
            ('desired_goal', self.goal_box),
            ('state_desired_goal', self.goal_box),
            ('achieved_goal', self.goal_box),
            ('state_achieved_goal', self.goal_box),
        ])
        # hack for state-based experiments for other envs
        # self.observation_space = Box(
        #     np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]),
        #     np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]),
        # )
        # self.goal_space = Box(
        #     np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]),
        #     np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]),
        # )

        self.reset()
        self.reset_mocap_welds()
예제 #29
0
    def __init__(self,
                 random_init=True,
                 obs_type='with_goal',
                 goal_low=(-0.04, 0.8, -0.08),
                 goal_high=(0.04, 0.88, -0.08),
                 rotMode='fixed',
                 **kwargs):
        self.quick_init(locals())

        hand_low = (-0.5, 0.40, -0.15)
        hand_high = (0.5, 1, 0.5)
        obj_low = (-0.1, 0.6, 0.02)
        obj_high = (0.1, 0.7, 0.02)

        SawyerXYZEnv.__init__(self,
                              frame_skip=5,
                              action_scale=1. / 100,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)
        self.init_config = {
            'obj_init_pos': np.array([0, 0.6, 0.02]),
            'obj_init_angle': 0.3,
            'hand_init_pos': np.array([0, 0.6, 0.2], dtype=np.float32),
        }
        self.goal = np.array([0., 0.84, -0.08], dtype=np.float32)
        self.obj_init_pos = self.init_config['obj_init_pos']
        self.obj_init_angle = self.init_config['obj_init_angle']
        self.hand_init_pos = self.init_config['hand_init_pos']

        assert obs_type in OBS_TYPE
        self.obs_type = obs_type

        if goal_low is None:
            goal_low = self.hand_low

        if goal_high is None:
            goal_high = self.hand_high

        self.random_init = random_init
        self.max_path_length = 200
        self.rotMode = rotMode
        if rotMode == 'fixed':
            self.action_space = Box(
                np.array([-1, -1, -1, -1]),
                np.array([1, 1, 1, 1]),
            )
        elif rotMode == 'rotz':
            self.action_rot_scale = 1. / 50
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi, -1]),
                np.array([1, 1, 1, np.pi, 1]),
            )
        elif rotMode == 'quat':
            self.action_space = Box(
                np.array([-1, -1, -1, 0, -1, -1, -1, -1]),
                np.array([1, 1, 1, 2 * np.pi, 1, 1, 1, 1]),
            )
        else:
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi / 2, -np.pi / 2, 0, -1]),
                np.array([1, 1, 1, np.pi / 2, np.pi / 2, np.pi * 2, 1]),
            )
        self.obj_and_goal_space = Box(
            np.hstack((obj_low, goal_low)),
            np.hstack((obj_high, goal_high)),
        )
        self.goal_space = Box(np.array(goal_low), np.array(goal_high))
        if self.obs_type == 'plain':
            self.observation_space = Box(
                np.hstack((
                    self.hand_low,
                    obj_low,
                )),
                np.hstack((
                    self.hand_high,
                    obj_high,
                )),
            )
        elif self.obs_type == 'with_goal':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, goal_low)),
                np.hstack((self.hand_high, obj_high, goal_high)),
            )
        else:
            raise NotImplementedError
        self.reset()
예제 #30
0
parser.add_argument("--num-cpus", type=int, default=0)

if __name__ == "__main__":
    args = parser.parse_args()
    ray.init(num_cpus=args.num_cpus or None)
    register_env("NestedSpaceRepeatAfterMeEnv",
                 lambda c: NestedSpaceRepeatAfterMeEnv(c))

    config = {
        "env": "NestedSpaceRepeatAfterMeEnv",
        "env_config": {
            "space":
            Dict({
                "a":
                Tuple([Dict({
                    "d": Box(-10.0, 10.0, ()),
                    "e": Discrete(2)
                })]),
                "b":
                Box(-10.0, 10.0, (2, )),
                "c":
                Discrete(4)
            }),
        },
        "entropy_coeff": 0.00005,  # We don't want high entropy in this Env.
        "gamma": 0.0,  # No history in Env (bandit problem).
        "lr": 0.0005,
        "num_envs_per_worker": 20,
        "num_sgd_iter": 4,
        "num_workers": 0,
        "vf_loss_coeff": 0.01,