ground_body = world.CreateStaticBody( position=(0, 0), shapes=polygonShape(box=(50, 1)), ) # Create a couple dynamic bodies bodyc = world.CreateDynamicBody(position=(20, 45)) circle = bodyc.CreateCircleFixture(radius=0.5, density=1, friction=0.3) bodyb = world.CreateDynamicBody(position=(30, 45), angle=15) box = bodyb.CreatePolygonFixture(box=(2, 1), density=1, friction=0.3) world.CreateWeldJoint(bodyA=bodyc, bodyB=bodyb, anchor=bodyb.worldCenter) drawer = OpencvDrawFuncs(w=640, h=480, ppm=20) drawer.install() while True: key = 0xFF & cv2.waitKey(int(TIME_STEP * 1000)) # milliseconds if key == 27: break drawer.clear_screen() drawer.draw_world(world) # Make Box2D simulate the physics of our world for one step. world.Step(TIME_STEP, 10, 10) # Flip the screen and try to keep at the target FPS cv2.imshow("world", drawer.screen)
class Pusher2d(gym.Env): def __init__(self, control_noise=0.): self.control_noise = control_noise self.seed() self.world = Box2D.b2World(gravity=(0, 0)) self.pusher = None self.box = None # Actions: x-movement, y-movement (clipped -1 to 1) self.action_space = spaces.Box(np.ones(2) * -1, np.ones(2), dtype=np.float32) # State: pusher xy position, box xy position, pusher xy velocity, box xy velocity, goal xy position self.observation_space = spaces.Box(np.ones(10) * MIN_COORD, np.ones(10) * MAX_COORD, dtype=np.float32) self.reset() self.drawer = OpencvDrawFuncs(w=240, h=180, ppm=40) self.drawer.install() def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def random_place(self): """ returns [x, y] within an area slightly away from the initial box position """ return [ self.np_random.uniform(BOX_START[0] + BOX_RAD + GOAL_RAD, MAX_COORD - RAD * SIDE_GAP_MULT), self.np_random.uniform(BOX_START[1] + BOX_RAD + GOAL_RAD, MAX_COORD - RAD * SIDE_GAP_MULT) ] def _destroy(self): """ removes instantiated Box2D entities """ if not self.box: return self.world.DestroyBody(self.box) self.world.DestroyBody(self.pusher) def reset(self): """ standard Gym method; returns first state of episode """ self._destroy() self.pusher = self.world.CreateDynamicBody( position=PUSHER_START[:], fixtures=fixtureDef(shape=circleShape(radius=RAD, pos=(0, 0)), density=1.0)) self.box = self.world.CreateDynamicBody( position=BOX_START[:], fixtures=fixtureDef(shape=circleShape(radius=BOX_RAD, pos=(0, 0)), density=1.0)) self.goal_pos = self.random_place() self.elapsed_steps = 0 return self._get_obs() def step(self, action, render=False): """ standard Gym method; returns s, r, d, i """ if render: self.drawer.clear_screen() self.drawer.draw_world(self.world) action = np.clip(action, -1, 1).astype(np.float32) if self.control_noise > 0.: action += np.random.normal(0., scale=self.control_noise, size=action.shape) self.elapsed_steps += 1 self.pusher._b2Body__SetLinearVelocity( (FORCE_MULT * action[0], FORCE_MULT * action[1])) self.box._b2Body__SetActive(True) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) if render: cv2.imshow("world", self.drawer.screen) cv2.waitKey(20) done = False reward = -1 obj_coords = np.concatenate( [self.pusher.position.tuple, self.box.position.tuple]) info = {"done": None} # check if out of bounds if np.min(obj_coords) < MIN_COORD or np.max(obj_coords) > MAX_COORD: reward = -1 * (MAX_STEPS - self.elapsed_steps + 2) done = True info['done'] = 'unstable simulation' # check if out of time elif self.elapsed_steps >= MAX_STEPS: done = True info["done"] = "max_steps_reached" # check if goal reached elif np.linalg.norm(np.array(self.box.position.tuple) - self.goal_pos) < RAD + GOAL_RAD: done = True reward = 0 info["done"] = "goal reached" return self._get_obs(), reward, done, info def _get_obs(self): """ returns current state of environment """ state = np.concatenate([ self.pusher.position.tuple, self.box.position.tuple, self.pusher.linearVelocity.tuple, self.box.linearVelocity.tuple, self.goal_pos ]) return state def apply_hindsight(self, states, actions, goal_state): """ returns list of new states and list of new rewards for use with HER """ goal = goal_state[2:4] # get new goal location (last location of box) states.append(goal_state) num_tuples = len(actions) her_states, her_rewards = [], [] states[0][-2:] = goal.copy() her_states.append(states[0]) # for each state, adjust goal and calculate reward obtained for i in range(1, num_tuples + 1): state = states[i] state[-2:] = goal.copy() reward = self._HER_calc_reward(state) her_states.append(state) her_rewards.append(reward) return her_states, her_rewards def _HER_calc_reward(self, state): """ given state, returns reward for transitioning to this state (used by HER) """ if np.linalg.norm(state[2:4] - state[4:6]) < RAD + GOAL_RAD: return 0 else: return -1 def set_state(self, state): self.pusher.position = state[:2] self.box.position = state[2:4] self.pusher.linearVelocity = state[4:6] self.box.linearVelocity = state[6:8] if len( state ) == 10: # The state can also be observation only, which does not include the goal self.goal_pos = state[8:10] def get_state(self): return copy.copy(self._get_obs()) def get_nxt_state(self, state, action): original_state = self.get_state() original_elapsed_steps = self.elapsed_steps self.set_state(state) nxt_state, _, _, _ = self.step(action) nxt_state = nxt_state[:8] # Make sure there is no side effect self.set_state(original_state) self.elapsed_steps = original_elapsed_steps return nxt_state