def __init__(self, name, horizon, gamma): """ Constructor. Args: name (str): gym id of the environment; horizon (int): the horizon; gamma (float): the discount factor. """ # MDP creation if '- ' + name in pybullet_envs.getList(): import pybullet pybullet.connect(pybullet.DIRECT) self.env = gym.make(name) self.env._max_episode_steps = np.inf # Hack to ignore gym time limit. # MDP properties assert not isinstance(self.env.observation_space, gym_spaces.MultiDiscrete) assert not isinstance(self.env.action_space, gym_spaces.MultiDiscrete) action_space = self._convert_gym_space(self.env.action_space) observation_space = self._convert_gym_space(self.env.observation_space) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) if isinstance(action_space, Discrete): self._convert_action = lambda a: a[0] else: self._convert_action = lambda a: a super().__init__(mdp_info)
def __init__(self, horizon=100, gamma=.95): """ Constructor. Args: horizon (int, 100): horizon of the problem; gamma (float, .95): discount factor. """ # MDP parameters self.max_pos = 1. self.max_velocity = 3. high = np.array([self.max_pos, self.max_velocity]) self._g = 9.81 self._m = 1. self._dt = .1 self._discrete_actions = [-4., 4.] # MDP properties observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Discrete(2) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._viewer = Viewer(1, 1) super().__init__(mdp_info)
def __init__(self, **kwargs): # Define environment properties high_x = np.array([5.0, 5.0, np.pi]) low_x = -high_x high_u = np.array([1.0, 3.0]) low_u = -high_u observation_space = spaces.Box(low=low_x, high=high_x) action_space = spaces.Box(low=low_u, high=high_u) gamma = 0.9 horizon = 400 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) hz = 10.0 super(TurtlebotGazebo, self).__init__('turtlebot_gazebo', mdp_info, hz, **kwargs) # subscribe to /cmd_vel topic to publish the setpoint self._pub = rospy.Publisher('/cmd_vel', Twist, queue_size=1) # subscribe to /gazebo/model_states to get the position of the turtlebot model_state_service_name = '/gazebo/get_model_state' rospy.wait_for_service(model_state_service_name) self._model_state_service = rospy.ServiceProxy(model_state_service_name, GetModelState)
def __init__(self, gamma=0.995, horizon=1500, debug_gui=False, kine_type='analytical'): self.simulation = AirHockeyBase(gamma, horizon, debug_gui) self.kin_type = kine_type # [X, Y, Pitch, Yaw, Psi] low = np.array([0.6, -0.5]) #, np.deg2rad(-180), np.deg2rad(-10), 0]) high = np.array([1.1, 0.5]) #, np.deg2rad(180), np.deg2rad(75), 0]) action_space = Box(low, high) observation_space = self.simulation._mdp_info.observation_space mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info) self.table_surface_height = 0.162 self.iiwa_global_config = [1, -1, 1] self.fixed_parameters = [0., np.deg2rad(-35), 1.7] # self.fixed_parameters = [0., np.deg2rad(-35), 0] self.get_initial_state() if self.kin_type == 'analytical': self.kinematics = Kinematics() elif self.kin_type == 'pykdl' and KDLInterface is not None: self.kinematics = KDLInterface(self.simulation.robot_path, 7, 'front_base', 'F_link_ee', np.array(IIWA_JOINT_MIN_LIMITS), np.array(IIWA_JOINT_MAX_LIMITS), np.array([0., 0., -9.81])) else: raise NotImplementedError
def __init__(self, start=None, goal=None, goal_threshold=.1, noise_step=.025, noise_reward=0, reward_goal=0., thrust=.05, puddle_center=None, puddle_width=None, gamma=.99, horizon=5000): """ Constructor. Args: start (np.array, None): starting position of the agent; goal (np.array, None): goal position; goal_threshold (float, .1): distance threshold of the agent from the goal to consider it reached; noise_step (float, .025): noise in actions; noise_reward (float, 0): standard deviation of gaussian noise in reward; reward_goal (float, 0): reward obtained reaching goal state; thrust (float, .05): distance walked during each action; puddle_center (np.array, None): center of the puddle; puddle_width (np.array, None): width of the puddle; gamma (float, .99): discount factor. horizon (int, 5000): horizon of the problem; """ # MDP parameters self._start = np.array([.2, .4]) if start is None else start self._goal = np.array([1., 1.]) if goal is None else goal self._goal_threshold = goal_threshold self._noise_step = noise_step self._noise_reward = noise_reward self._reward_goal = reward_goal self._thrust = thrust puddle_center = [[.3, .6], [.4, .5], [.8, .9] ] if puddle_center is None else puddle_center self._puddle_center = [np.array(center) for center in puddle_center] puddle_width = [[.1, .03], [.03, .1], [.03, .1] ] if puddle_width is None else puddle_width self._puddle_width = [np.array(width) for width in puddle_width] self._actions = [np.zeros(2) for _ in range(5)] for i in range(4): self._actions[i][i // 2] = thrust * (i % 2 * 2 - 1) # MDP properties action_space = Discrete(5) observation_space = Box(0., 1., shape=(2, )) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._pixels = None self._viewer = Viewer(1.0, 1.0) super().__init__(mdp_info)
def __init__(self, height=3, width=3, goal=(0, 2), start=(2, 0)): # MDP properties observation_space = spaces.Discrete(height * width) action_space = spaces.Discrete(4) horizon = np.inf gamma = .95 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info, height, width, start, goal)
def __init__(self, name, horizon, gamma, wrappers=None, wrappers_args=None, **env_args): """ Constructor. Args: name (str): gym id of the environment; horizon (int): the horizon; gamma (float): the discount factor; wrappers (list): list of wrappers to apply over the environment. It is possible to pass arguments to the wrappers by providing a tuple with two elements: the gym wrapper class and a dictionary containing the parameters needed by the wrapper constructor; wrappers_args (list): list of list of arguments for each wrapper; ** env_args: other gym environment parameters. """ # MDP creation self._not_pybullet = True self._first = True if pybullet_found and '- ' + name in pybullet_envs.getList(): import pybullet pybullet.connect(pybullet.DIRECT) self._not_pybullet = False self.env = gym.make(name, **env_args) if wrappers is not None: if wrappers_args is None: wrappers_args = [dict()] * len(wrappers) for wrapper, args in zip(wrappers, wrappers_args): if isinstance(wrapper, tuple): self.env = wrapper[0](self.env, *args, **wrapper[1]) else: self.env = wrapper(self.env, *args, **env_args) self.env._max_episode_steps = np.inf # Hack to ignore gym time limit. # MDP properties assert not isinstance(self.env.observation_space, gym_spaces.MultiDiscrete) assert not isinstance(self.env.action_space, gym_spaces.MultiDiscrete) action_space = self._convert_gym_space(self.env.action_space) observation_space = self._convert_gym_space(self.env.observation_space) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) if isinstance(action_space, Discrete): self._convert_action = lambda a: a[0] else: self._convert_action = lambda a: a super().__init__(mdp_info)
def __init__(self, name, width=84, height=84, ends_at_life=False, max_pooling=True, history_length=4, max_no_op_actions=30): """ Constructor. Args: name (str): id name of the Atari game in Gym; width (int, 84): width of the screen; height (int, 84): height of the screen; ends_at_life (bool, False): whether the episode ends when a life is lost or not; max_pooling (bool, True): whether to do max-pooling or average-pooling of the last two frames when using NoFrameskip; history_length (int, 4): number of frames to form a state; max_no_op_actions (int, 30): maximum number of no-op action to execute at the beginning of an episode. """ # MPD creation if 'NoFrameskip' in name: self.env = MaxAndSkip(gym.make(name), history_length, max_pooling) else: self.env = gym.make(name) # MDP parameters self._img_size = (width, height) self._episode_ends_at_life = ends_at_life self._max_lives = self.env.unwrapped.ale.lives() self._lives = self._max_lives self._force_fire = None self._real_reset = True self._max_no_op_actions = max_no_op_actions self._history_length = history_length self._current_no_op = None assert self.env.unwrapped.get_action_meanings()[0] == 'NOOP' # MDP properties action_space = Discrete(self.env.action_space.n) observation_space = Box(low=0., high=255., shape=(history_length, self._img_size[1], self._img_size[0])) horizon = np.inf # the gym time limit is used. gamma = .99 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info)
def __init__(self, A, B, Q, R, max_pos=np.inf, max_action=np.inf, random_init=False, episodic=False, gamma=0.9, horizon=50, initial_state=None): """ Constructor. Args: A (np.ndarray): the state dynamics matrix; B (np.ndarray): the action dynamics matrix; Q (np.ndarray): reward weight matrix for state; R (np.ndarray): reward weight matrix for action; max_pos (float, np.inf): maximum value of the state; max_action (float, np.inf): maximum value of the action; random_init (bool, False): start from a random state; episodic (bool, False): end the episode when the state goes over the threshold; gamma (float, 0.9): discount factor; horizon (int, 50): horizon of the mdp. """ self.A = A self.B = B self.Q = Q self.R = R self._max_pos = max_pos self._max_action = max_action self._episodic = episodic self.random_init = random_init self._initial_state = initial_state # MDP properties high_x = self._max_pos * np.ones(A.shape[0]) low_x = -high_x high_u = self._max_action * np.ones(B.shape[1]) low_u = -high_u observation_space = spaces.Box(low=low_x, high=high_x) action_space = spaces.Box(low=low_u, high=high_u) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info)
def __init__(self, m=2., M=8., l=.5, g=9.8, mu=1e-2, max_u=50., noise_u=10., horizon=3000, gamma=.95): """ Constructor. Args: m (float, 2.0): mass of the pendulum; M (float, 8.0): mass of the cart; l (float, .5): length of the pendulum; g (float, 9.8): gravity acceleration constant; max_u (float, 50.): maximum allowed input torque; noise_u (float, 10.): maximum noise on the action; horizon (int, 3000): horizon of the problem; gamma (float, .95): discount factor. """ # MDP parameters self._m = m self._M = M self._l = l self._g = g self._alpha = 1 / (self._m + self._M) self._mu = mu self._dt = .1 self._max_u = max_u self._noise_u = noise_u high = np.array([np.inf, np.inf]) # MDP properties observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Discrete(3) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._viewer = Viewer(2.5 * l, 2.5 * l) self._last_u = None self._state = None super().__init__(mdp_info)
def __init__(self, items, gamma, horizon, trans_model_abs_path, item_dist=None): # MDP parameters # 1) discrete actions: list of item names or representing integers # 2) actions on n-dimensional space: list of a pair of min and max values per action self.items = items self.action_dim = len(self.items) if item_dist is None: if len(self.items.shape) == 1: if 'none' in self.items: self.item_dist = np.zeros(self.action_dim) self.item_dist[1:] = 1/(self.action_dim-1) else: self.item_dist = 1/(self.action_dim) else: self.item_dist = None else: self.item_dist = item_dist self.gamma = gamma ## discount factor self.horizon = horizon ## time limit to long self.trans_model = ModelMaker(FlexibleTorchModel, model_dir_path=trans_model_abs_path) self.trans_model_params = self.trans_model.model.state_dict() tmp = list(self.trans_model_params.keys()) key = list(filter(lambda x: '0.weight' in x, tmp))[0] self.state_dim = self.trans_model_params[key].shape[1] - self.action_dim if 'none' in self.items: self.state_dim += 1 MM_VAL = 100 self.min_point = np.ones(self.state_dim) * -MM_VAL self.max_point = np.ones(self.state_dim) * MM_VAL if len(self.items.shape) == 1: self._discrete_actions = list(range(self.action_dim)) else: self._discrete_actions = None # MDP properties observation_space = spaces.Box(low=self.min_point, high=self.max_point) if len(self.items.shape) == 1: action_space = spaces.Discrete(self.action_dim) else: action_space = spaces.Box(low=self.items[0][0], high=self.items[0][1]) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info)
def __init__(self, game, horizon, gamma, obs_shape=None, **env_args): self._close_at_stop = True self.env = retro.make(game=game, **env_args) self.env._max_episode_steps = np.inf observation_space = None if (obs_shape == None): observation_space = self._convert_gym_space( self.env.observation_space) else: observation_space = Discrete(obs_shape) action_space = self._convert_gym_space(self.env.action_space) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info)
def __init__(self, small=True, n_steps_action=3): """ Constructor. Args: small (bool, True): whether to use a small state space or not. n_steps_action (int, 3): number of integration intervals for each step of the mdp. """ # MDP parameters self.field_size = 150 if small else 1000 low = np.array([0, 0, -np.pi, -np.pi / 12.]) high = np.array([self.field_size, self.field_size, np.pi, np.pi / 12.]) self.omega_max = np.array([np.pi / 12.]) self._v = 3. self._T = 5. self._dt = .2 self._gate_s = np.empty(2) self._gate_e = np.empty(2) self._gate_s[0] = 100 if small else 350 self._gate_s[1] = 120 if small else 400 self._gate_e[0] = 120 if small else 450 self._gate_e[1] = 100 if small else 400 self._out_reward = -100 self._success_reward = 0 self._small = small self._state = None self.n_steps_action = n_steps_action # MDP properties observation_space = spaces.Box(low=low, high=high) action_space = spaces.Box(low=-self.omega_max, high=self.omega_max) horizon = 5000 gamma = .99 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._viewer = Viewer(self.field_size, self.field_size, background=(66, 131, 237)) super().__init__(mdp_info)
def __init__(self, domain_name, task_name, horizon, gamma, task_kwargs=None, dt=.01, width_screen=480, height_screen=480, camera_id=0): """ Constructor. Args: domain_name (str): name of the environment; task_name (str): name of the task of the environment; horizon (int): the horizon; gamma (float): the discount factor; task_kwargs (dict, None): parameters of the task; dt (float, .01): duration of a control step; width_screen (int, 480): width of the screen; height_screen (int, 480): height of the screen; camera_id (int, 0): position of camera to render the environment; """ # MDP creation if task_kwargs is None: task_kwargs = dict() task_kwargs[ 'time_limit'] = np.inf # Hack to ignore dm_control time limit. self.env = suite.load(domain_name, task_name, task_kwargs=task_kwargs) # MDP properties action_space = self._convert_action_space(self.env.action_spec()) observation_space = self._convert_observation_space( self.env.observation_spec()) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) self._viewer = ImageViewer((width_screen, height_screen), dt) self._camera_id = camera_id super().__init__(mdp_info)
def __init__(self, name, horizon, gamma, **kwargs): """ Constructor. Args: name (str): gym id of the environment; horizon (int): the horizon; gamma (float): the discount factor. """ # MDP creation self._close_at_stop = True if '- ' + name in pybullet_envs.getList(): import pybullet pybullet.connect(pybullet.DIRECT) self._close_at_stop = False self.env = gym.make(name) # set to 100Hz for pendulum if name == 'Pendulum-ID-v1' or name == 'Cartpole-ID-v1': self.env._max_episode_steps = 1000 self.env.unwrapped._dt = 0.01 self.env.unwrapped._sigma = 1e-4 self.env._max_episode_steps = np.inf # Hack to ignore gym time limit. # MDP properties assert not isinstance(self.env.observation_space, gym_spaces.MultiDiscrete) assert not isinstance(self.env.action_space, gym_spaces.MultiDiscrete) action_space = self._convert_gym_space(self.env.action_space) observation_space = self._convert_gym_space(self.env.observation_space) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) if isinstance(action_space, Discrete): self._convert_action = lambda a: a[0] else: self._convert_action = lambda a: a super().__init__(mdp_info)
def __init__(self, m, g, a, horizon=100, gamma=.95): """ Constructor. """ # MDP parameters self.max_pos = 1. self.max_velocity = 3. high = np.array([self.max_pos, self.max_velocity]) self._g = g self._m = m self._dt = .1 self._discrete_actions = [-a, a] # MDP properties observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Discrete(2) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info)
def __init__(self, random_start=False, m=1., l=1., g=9.8, mu=1e-2, max_u=5., horizon=5000, gamma=.99): """ Constructor. Args: random_start (bool, False): whether to start from a random position or from the horizontal one; m (float, 1.0): mass of the pendulum; l (float, 1.0): length of the pendulum; g (float, 9.8): gravity acceleration constant; mu (float, 1e-2): friction constant of the pendulum; max_u (float, 5.0): maximum allowed input torque; horizon (int, 5000): horizon of the problem; gamma (int, .99): discount factor. """ # MDP parameters self._m = m self._l = l self._g = g self._mu = mu self._random = random_start self._dt = .01 self._max_u = max_u self._max_omega = 5 / 2 * np.pi high = np.array([np.pi, self._max_omega]) # MDP properties observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Box(low=np.array([-max_u]), high=np.array([max_u])) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._viewer = Viewer(2.5 * l, 2.5 * l) self._last_u = None super().__init__(mdp_info)
def __init__(self, random_start=False): """ Constructor. Args: random_start (bool, False): whether to start from a random position or from the horizontal one. """ # MDP parameters gamma = 0.97 self._Mr = 0.3 * 2 self._Mp = 2.55 self._Ip = 2.6e-2 self._Ir = 4.54e-4 * 2 self._l = 13.8e-2 self._r = 5.5e-2 self._dt = 1e-2 self._g = 9.81 self._max_u = 5 self._random = random_start high = np.array([-np.pi / 2, 15, 75]) # MDP properties observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Box(low=np.array([-self._max_u]), high=np.array([self._max_u])) horizon = 300 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._viewer = Viewer(5 * self._l, 5 * self._l) self._last_x = 0 super().__init__(mdp_info)
def __init__(self, **kwargs): # Define environment properties high_x = np.array([11.088889122009277, 11.088889122009277, np.pi]) low_x = np.array([0, 0, -np.pi]) high_u = np.array([2.0, 2.0]) low_u = np.array([0., -2.0]) observation_space = spaces.Box(low=low_x, high=high_x) action_space = spaces.Box(low=low_u, high=high_u) gamma = 0.9 horizon = 100 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) hz = 10.0 self._target = np.array([2.0, 2.0, 0.0]) self._current_pose = np.zeros(3) super().__init__('turtlesim_env', mdp_info, hz, **kwargs) # subscribe to /turtle1/cmd_vel topic to publish the setpoint self._cmd = rospy.Publisher('/turtle1/cmd_vel', Twist, queue_size=1) # subscribe to /turtle1/pose to get the turtle pose self._pose = rospy.Subscriber('/turtle1/pose', Pose, self._pose_callback) # subscribe to the teleporting service to reset environment self._reset_service_name = '/turtle1/teleport_absolute' rospy.wait_for_service(self._reset_service_name) self._reset_service = rospy.ServiceProxy(self._reset_service_name, TeleportAbsolute)
def __init__(self, file_name, actuation_spec, observation_spec, gamma, horizon, n_substeps=1, n_intermediate_steps=1, additional_data_spec=None, collision_groups=None): """ Constructor. Args: file_name (string): The path to the XML file with which the environment should be created; actuation_spec (list): A list specifying the names of the joints which should be controllable by the agent. Can be left empty when all actuators should be used; observation_spec (list): A list containing the names of data that should be made available to the agent as an observation and their type (ObservationType). An entry in the list is given by: (name, type); gamma (float): The discounting factor of the environment; horizon (int): The maximum horizon for the environment; n_substeps (int): The number of substeps to use by the MuJoCo simulator. An action given by the agent will be applied for n_substeps before the agent receives the next observation and can act accordingly; n_intermediate_steps (int): The number of steps between every action taken by the agent. Similar to n_substeps but allows the user to modify, control and access intermediate states. additional_data_spec (list): A list containing the data fields of interest, which should be read from or written to during simulation. The entries are given as the following tuples: (key, name, type) key is a string for later referencing in the "read_data" and "write_data" methods. The name is the name of the object in the XML specification and the type is the ObservationType; collision_groups (list): A list containing groups of geoms for which collisions should be checked during simulation via ``check_collision``. The entries are given as: ``(key, geom_names)``, where key is a string for later referencing in the "check_collision" method, and geom_names is a list of geom names in the XML specification. """ # Create the simulation self.sim = mujoco_py.MjSim(mujoco_py.load_model_from_path(file_name), nsubsteps=n_substeps) self.n_intermediate_steps = n_intermediate_steps self.viewer = None self._state = None # Read the actuation spec and build the mapping between actions and ids # as well as their limits if len(actuation_spec) == 0: self.action_indices = [i for i in range(0, len(self.sim.model._actuator_name2id))] else: self.action_indices = [] for name in actuation_spec: self.action_indices.append(self.sim.model._actuator_name2id[name]) low = [] high = [] for index in self.action_indices: if self.sim.model.actuator_ctrllimited[index]: low.append(self.sim.model.actuator_ctrlrange[index][0]) high.append(self.sim.model.actuator_ctrlrange[index][1]) else: low.append(-np.inf) high.append(np.inf) action_space = Box(np.array(low), np.array(high)) # Read the observation spec to build a mapping at every step. It is # ensured that the values appear in the order they are specified. if len(observation_spec) == 0: raise AttributeError("No Environment observations were specified. " "Add at least one observation to the observation_spec.") else: self.observation_map = observation_spec # We can only specify limits for the joint positions, all other # information can be potentially unbounded low = [] high = [] for name, ot in self.observation_map: obs_count = len(self._observation_map(name, ot)) if obs_count == 1 and ot == ObservationType.JOINT_POS: joint_range = self.sim.model.jnt_range[self.sim.model._joint_name2id[name]] if not(joint_range[0] == joint_range[1] == 0.0): low.append(joint_range[0]) high.append(joint_range[1]) else: low.extend(-np.inf) high.extend(np.inf) else: low.extend([-np.inf] * obs_count) high.extend([np.inf] * obs_count) observation_space = Box(np.array(low), np.array(high)) # Pre-process the additional data to allow easier writing and reading # to and from arrays in MuJoCo self.additional_data = {} for key, name, ot in additional_data_spec: self.additional_data[key] = (name, ot) # Pre-process the collision groups for "fast" detection of contacts self.collision_groups = {} if self.collision_groups is not None: for name, geom_names in collision_groups: self.collision_groups[name] = {self.sim.model._geom_name2id[geom_name] for geom_name in geom_names} # Finally, we create the MDP information and call the constructor of # the parent class mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info)
def __init__(self, database, area, observation, delta_degree=0.1, speed=5., gamma=1, start_hour=8, end_hour=22, days_loaded=1, train_days=None, add_time=False, allow_wait=False, project_dir=None): # self.area = area self.next_event = None self.time = None self.position = None self.done = False self.speed = speed self.departures = None self.spot_states = None self.violation_times = None self.potential_violation_times = None self.allowed_durations = None self.delayed_arrival = None self.observation = observation self.render_images = None self.position_plots = None self.start_hour = start_hour self.end_hour = end_hour self.add_time = add_time self.days_loaded = days_loaded self.days_consumed = np.inf self.day = None self.end_day = None self.train_days = train_days if train_days is not None else list(range(1, 356)) conn = sqlite3.connect('file:%s?mode=ro' % database, uri=True) cursor = conn.cursor() if area is None: self.areas = [] elif isinstance(area, list) or isinstance(area, tuple): self.areas = area else: self.areas = [area] if len(self.areas) == 0: where_areas = " 1 = 1 " # if no area specified, use all! else: where_areas = " or ".join(map(lambda a: " Area=\"" + a + "\" ", self.areas)) cursor.execute("SELECT min(lat) as south, max(lat) as north, min(lon) as west, max(lon) as east " "from devices where " + where_areas) row = cursor.fetchone() sqlx = 'select DeviceID, ArrivalTime, DepartureTime, duration*60 ' \ 'from events join durations on durations.sign=events.sign ' \ 'where ' + where_areas + ' and "Vehicle Present"="True" order by ArrivalTime' self.all_events = pd.read_sql(sqlx, conn, coerce_float=True, params=None) self.events = None self.event_idx = None dx = max(0.01, row[1] - row[0]) * delta_degree dy = max(0.01, row[3] - row[2]) * delta_degree self.north = row[1] + dx self.south = row[0] - dx self.west = row[2] - dy self.east = row[3] + dy if project_dir is None: project_dir = os.getcwd() data_dir = os.path.join(project_dir, "data") area_hash = hashlib.md5(json.dumps(sorted(area)).encode("utf8")).hexdigest() graph_file = 'graph_' + area_hash + '.gml' try: self.graph = ox.load_graphml(graph_file, folder=data_dir) except FileNotFoundError: self.graph = ox.graph_from_bbox( south=self.south, north=self.north, west=self.west, east=self.east, network_type='walk') # ox.plot_graph(self.graph) ox.save_graphml(self.graph, filename=graph_file, folder=data_dir) print("graph nodes: ", len(self.graph.nodes), ", edges: ", len(self.graph.edges)) self.actions = tuple(self.graph.edges) if allow_wait: self.actions = self.actions + ('wait', ) self.edge_indices = {edge: i for i, edge in enumerate(self.actions)} self.edge_lengths = nx.get_edge_attributes(self.graph, 'length') self.edge_resources = dict() self.resource_edges = dict() self.devices = dict() self.device_ordering = [] self.spot_plots = dict() devices_file = os.path.join(data_dir, "devices_" + area_hash + ".pckl") try: with open(devices_file, "rb") as f: self.devices, self.device_ordering, self.edge_resources, self.resource_edges = pickle.load(f) except: print("cannot load devices file", devices_file) fig, ax = ox.plot_graph(self.graph, show=False, close=False) res = cursor.execute("SELECT lat, lon, DeviceID " + "from devices " + "where " + where_areas) for lat, lon, device_id, *vio_times in tqdm(res.fetchall()): # g = ox.truncate_graph_bbox(self.graph, lat+dy, lat-dy, lon+dx, lon-dx, True, True) g = ox.truncate_graph_dist(self.graph, ox.get_nearest_node(self.graph, (lat, lon)), 500, retain_all=True) nearest = ox.get_nearest_edge(g, (lat, lon)) edge = (nearest[1], nearest[2]) self.device_ordering.append(device_id) self.devices[device_id] = (lat, lon, edge) if edge in self.edge_resources: edge_resources = self.edge_resources[edge] else: edge_resources = [] self.edge_resources[edge] = edge_resources edge_resources.append(device_id) self.resource_edges[device_id] = edge ax.plot(*nearest[0].xy, color='blue') ax.scatter(lon, lat, s=2, c="red") with open(devices_file, "wb") as f: pickle.dump((self.devices, self.device_ordering, self.edge_resources, self.resource_edges), f) plt.show() print(len(self.devices), "parking devices") mdp_info = MDPInfo(Discrete(2), Discrete(len(self.actions)), gamma, np.inf) super().__init__(mdp_info) cursor.close() conn.close()
def __init__(self, files, actuation_spec, observation_spec, gamma, horizon, timestep=1 / 240, n_intermediate_steps=1, debug_gui=False, **viewer_params): """ Constructor. Args: files (list): Paths to the URDF files to load; actuation_spec (list): A list of tuples specifying the names of the joints which should be controllable by the agent and tehir control mode. Can be left empty when all actuators should be used in position control; observation_spec (list): A list containing the names of data that should be made available to the agent as an observation and their type (ObservationType). An entry in the list is given by: (name, type); gamma (float): The discounting factor of the environment; horizon (int): The maximum horizon for the environment; timestep (float, 0.00416666666): The timestep used by the PyBullet simulator; n_intermediate_steps (int): The number of steps between every action taken by the agent. Allows the user to modify, control and access intermediate states; **viewer_params: other parameters to be passed to the viewer. See PyBulletViewer documentation for the available options. """ # Store simulation parameters self._timestep = timestep self._n_intermediate_steps = n_intermediate_steps # Create the simulation and viewer if debug_gui: self._client = BulletClient(connection_mode=pybullet.GUI) else: self._client = BulletClient(connection_mode=pybullet.DIRECT) self._client.setTimeStep(self._timestep) self._client.setGravity(0, 0, -9.81) self._client.setAdditionalSearchPath(pybullet_data.getDataPath()) self._viewer = PyBulletViewer( self._client, self._timestep * self._n_intermediate_steps, **viewer_params) self._state = None # Load model and create access maps self._model_map = dict() for file_name, kwargs in files.items(): model_id = self._client.loadURDF(file_name, **kwargs) model_name = self._client.getBodyInfo(model_id)[1].decode('UTF-8') self._model_map[model_name] = model_id self._model_map.update(self._custom_load_models()) self._joint_map = dict() self._link_map = dict() for model_id in self._model_map.values(): for joint_id in range(self._client.getNumJoints(model_id)): joint_data = self._client.getJointInfo(model_id, joint_id) if joint_data[2] != pybullet.JOINT_FIXED: joint_name = joint_data[1].decode('UTF-8') self._joint_map[joint_name] = (model_id, joint_id) link_name = joint_data[12].decode('UTF-8') self._link_map[link_name] = (model_id, joint_id) # Read the actuation spec and build the mapping between actions and ids # as well as their limits assert (len(actuation_spec) > 0) self._action_data = list() for name, mode in actuation_spec: if name in self._joint_map: data = self._joint_map[name] + (mode, ) self._action_data.append(data) low, high = self._compute_action_limits() action_space = Box(np.array(low), np.array(high)) # Read the observation spec to build a mapping at every step. It is # ensured that the values appear in the order they are specified. if len(observation_spec) == 0: raise AttributeError( "No Environment observations were specified. " "Add at least one observation to the observation_spec.") self._observation_map = observation_spec self._observation_indices_map = dict() # We can only specify limits for the joint positions, all other # information can be potentially unbounded low, high = self._compute_observation_limits() observation_space = Box(low, high) # Finally, we create the MDP information and call the constructor of # the parent class mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info) # Save initial state of the MDP self._initial_state = self._client.saveState()
def __init__(self, physicsClientId=1, render=True): print('=====init======') self.showSimulation = False self.ll, self.ul, self.jr, self.rs = [], [], [], [] self._states = [0, 1, 2, 3] self.current_t = 0 self.state = 1 self._state = np.array([0]) self.waittimer = 10 self.boxId = None self.robotId = None self.tableId = None self.planeId = None self.rob_start_pos = [-0.5, 0, 1.] self.rob_start_orn = p.getQuaternionFromEuler([m.pi, 0, 0]) self.obj_start_pos = [-0.5, 0, 0.73175] self.obj_top = [-0.5, 0, 0.8384935] self.obj_start_orn = p.getQuaternionFromEuler([0, 0, m.pi / 2]) self.end_eff_idx = 6 self.robot_left_finger_idx = 7 self.robot_right_finger_idx = 8 self._use_IK = 0 self._control_eu_or_quat = 0 self.joint_action_space = 7 self._joint_name_to_ids = {} self._renders = render self.urdfroot = currentdir self._physics_client_id = physicsClientId self._p = p self.first_ep = True self.grasp_attemp_pos = [] #print("urdrfoot:",self.urdfroot) #self.seed() # TODO gamma = 1. horizon = 300 high = np.array([-0.25, 0.1, 0.91]) low = np.array([-0.75, -0.1, 0.91]) self._max_u = 0.37 self._min_u = 0.30 #observation_space = np.array([0, 0, 0]) #action_space = self.sample_action() observation_space = spaces.Box(low=low, high=high) action_space = spaces.Box(low=np.array([-self._max_u]), high=np.array([self._min_u])) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info) if self._renders: self._physics_client_id = p.connect(p.SHARED_MEMORY) if self.showSimulation: if self._physics_client_id < 0: self._physics_client_id = p.connect(p.GUI) else: p.connect(p.DIRECT) else: p.connect(p.DIRECT) # TODO # self.planeId = p.loadURDF(pybullet_data.getDataPath() + "/plane.urdf") # # # # # Place table # self.tableId = p.loadURDF(pybullet_data.getDataPath() + '/table/table.urdf') # # # # # Place robot # #self.robotId = p.loadURDF(self.urdfroot + "/robot/panda_visual_arm_grasper.urdf", # #self.rob_start_pos, self.rob_start_orn) # self.robotId = p.loadURDF(currentdir + "/environment-master/panda_robot_grasper/panda_visual_arm_grasper.udrf", # self.rob_start_pos, self.rob_start_orn) # # # # # Place ycb object # self.boxId = p.loadURDF(self.urdfroot + '/003_cracker_box/model_normalized.urdf', self.obj_start_pos, self.obj_start_orn) #p.resetDebugVisualizerCamera(1.5, 245, -56, [-0.5, 0, 0.61]) # Add user debug parameters, comment back for debug slider use """self.control_prismatic_joint1 = p.addUserDebugParameter('control_prismatic_joint1', -.5, .5, 0) # 0