def __init__(self, verbose=2, locals=locals, globals=globals): super(ActionMaskCallback, self).__init__(verbose) self.model = None # type: BaseRLModel self.training_env = None # type: Union[gym.Env, VecEnv, None] self.n_calls = 0 # Number of time the callback was called # type: int self.num_timesteps = 0 # type: int self.locals = None # type: Dict[str, Any] self.globals = None # type: Dict[str, Any] # Set all actions to valid. self.action_space = MultiDiscrete([3, 21, 21, 21, 21]) self.action_mask = mask(self.action_space) self.action_mask[0] = [1, 0, 0] states = ['start', 'start_spring_forward', 'lift_leg', 'plant_leg', 'switch_leg'] self.gait = Walk() self.gait.num_timesteps = self.num_timesteps self.gait.action_mask = mask(self.action_space) self.gait.action_mask[0] = [1, 0, 0] self.gait.swinging_leg = 'right' self.gait.terminal = False self.gait.start_spring_forward_reward = 0 self.gait.start = True self.gait.step_count = 0 self.gait.step_flag = False self.gait.log = False self.gait.starting_position = 4 self.gait.teaching = None machine = Machine(self.gait, states=states, send_event=True, initial='start') # Setup for Pure Selector Orchestration machine.add_transition('move', 'start', 'plant_leg', conditions='brain_transition_plant_leg', prepare=['log_iteration', 'set_mask'], before=['reset_iter_counter'], after=['set_mask']) machine.add_transition('move', 'lift_leg', 'plant_leg', conditions='brain_transition_plant_leg', unless='is_start', prepare=['log_iteration', 'set_mask'], before=['reset_iter_counter'], after=['set_mask']) machine.add_transition('move', 'plant_leg', 'switch_leg', conditions='is_swinging_leg_planted', prepare=['log_iteration', 'set_mask'], before=['reset_iter_counter', 'switch_legs', 'increment_step_count'], after=['set_mask']) #machine.add_transition('move', 'switch_leg', 'lift_leg', conditions='brain_transition_lift_leg', prepare=['log_iteration', 'set_mask'], before=['reset_iter_counter']) machine.add_transition('move', 'switch_leg', 'plant_leg', conditions='brain_transition_lift_leg', prepare=['log_iteration', 'set_mask'], before=['reset_iter_counter'], after=['set_mask']) machine.add_transition('reset', 'start', 'start', before=['reinstate', 'reset_step_counter', 'set_mask']) machine.add_transition('reset', 'plant_leg', 'start', before=['reinstate', 'reset_step_counter', 'set_mask']) machine.add_transition('reset', 'switch_leg', 'start', before=['reinstate', 'reset_step_counter', 'set_mask']) machine.add_transition('reset', 'lift_leg', 'start', before=['reinstate', 'reset_step_counter', 'set_mask']) #print('callback init') """
def _on_training_start(self) -> None: """ This method is called before the first rollout starts. """ #print('train start') self.engine.valid_actions = mask(self.action_space) self.engine.valid_actions[0] = [1, 0, 0] # Set to start in gait phase 1. self.engine.action = 1 # Query the state to pass to def facts. sim = self.training_env.env_method('get_state') #print(sim[0]['state'][8], sim[0]['state'][13], sim[0]['action']) left_leg = { 'side': 'left', 'contact': sim[0]['state'][8], 'position': sim[0]['legs'][1].position[0], 'hip_angle': sim[0]['state'][4], 'knee_angle': sim[0]['state'][6], 'hip_height': sim[0]['state'][26], 'knee_height': sim[0]['state'][24] } right_leg = { 'side': 'right', 'contact': sim[0]['state'][13], 'position': sim[0]['legs'][3].position[0], 'hip_angle': sim[0]['state'][9], 'knee_angle': sim[0]['state'][11], 'hip_height': sim[0]['state'][27], 'knee_height': sim[0]['state'][25] } self.engine.reset(action=sim[0]['action'], left_position=left_leg['position'], left_contact=left_leg['contact'], \ right_position=right_leg['position'], right_contact=right_leg['contact'], \ left_hip_angle=left_leg['hip_angle'], left_knee_angle=left_leg['knee_angle'], \ right_hip_angle=right_leg['hip_angle'], right_knee_angle=right_leg['knee_angle'], \ left_hip_height=left_leg['hip_height'], right_hip_height=right_leg['hip_height'], \ left_knee_height=left_leg['knee_height'], right_knee_height=right_leg['knee_height']) #self.mask(self.action_space)() #self.engine.action = sim[0]['action'] self.engine.run() self.action_mask = self.engine.valid_actions self.training_env.env_method('set_infos', self.engine.valid_actions) return True
def __init__(self, verbose=2, locals=locals, globals=globals): super(ActionMaskCallback, self).__init__(verbose) # Those variables will be accessible in the callback # (they are defined in the base class) # The RL model self.model = None # type: BaseRLModel # An alias for self.model.get_env(), the environment used for training self.training_env = None # type: Union[gym.Env, VecEnv, None] # Number of time the callback was called self.n_calls = 0 # type: int self.num_timesteps = 0 # type: int # local and global variables self.locals = None # type: Dict[str, Any] self.globals = None # type: Dict[str, Any] # The logger object, used to report things in the terminal # self.logger = None # type: logger.Logger # # Sometimes, for event callback, it is useful # # to have access to the parent object # self.parent = None # type: Optional[BaseCallback] self.engine = Machine_Teaching() self.action_mask = None # Set all actions to valid. self.action_space = MultiDiscrete([3, 21, 21, 21, 21]) self.engine.action = 1 # gait phase instead of reading list, starts with 1, [] self.engine.valid_actions = mask(self.action_space) self.engine.valid_actions[0] = [1, 0, 0] # Set to start in gait phase 1. self.engine.value_list = [ -1, -0.9, -0.8, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1 ] #self.engine.valid_actions[0] = [1, 0, 0] self.engine.mask_left_hip = [] self.engine.mask_left_knee = [] self.engine.mask_right_hip = [] self.engine.mask_right_knee = [] self.engine.mask_width = 1 self.engine.sim = {} self.engine.possible_states = []
def reset(self): self.valid_actions = mask(self.action_space) self.valid_actions[0] = [1, 0, 0] self.terminal = False self._destroy() self.world.contactListener_bug_workaround = ContactDetector(self) self.world.contactListener = self.world.contactListener_bug_workaround self.game_over = False self.prev_shaping = None self.scroll = 0.0 self.lidar_render = 0 W = VIEWPORT_W / SCALE H = VIEWPORT_H / SCALE self._generate_terrain(self.hardcore) self._generate_clouds() init_x = TERRAIN_STEP * TERRAIN_STARTPAD / 2 init_y = TERRAIN_HEIGHT + 2 * LEG_H self.hull = self.world.CreateDynamicBody(position=(init_x, init_y), fixtures=HULL_FD) self.hull.color1 = (0.5, 0.4, 0.9) self.hull.color2 = (0.3, 0.3, 0.5) self.hull.ApplyForceToCenter( (self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True) self.legs = [] self.joints = [] for i in [ -1, +1 ]: # Original position is right leg slightly forward, left leg slightly back. leg = self.world.CreateDynamicBody( position=(init_x, init_y - LEG_H / 2 - LEG_DOWN), angle=(i * 0.15), # *0.05 fixtures=LEG_FD) leg.color1 = (0.6 - i / 10., 0.3 - i / 10., 0.5 - i / 10.) leg.color2 = (0.4 - i / 10., 0.2 - i / 10., 0.3 - i / 10.) rjd = revoluteJointDef( bodyA=self.hull, bodyB=leg, localAnchorA=(0, LEG_DOWN), localAnchorB=(0, LEG_H / 2), enableMotor=True, enableLimit=True, maxMotorTorque=MOTORS_TORQUE, motorSpeed=i, lowerAngle=-0.8, upperAngle=1.1, ) self.legs.append(leg) self.joints.append(self.world.CreateJoint(rjd)) lower = self.world.CreateDynamicBody( position=(init_x, init_y - LEG_H * 3 / 2 - LEG_DOWN), angle=(i * 0.15), # *0.05 fixtures=LOWER_FD) lower.color1 = (0.6 - i / 10., 0.3 - i / 10., 0.5 - i / 10.) lower.color2 = (0.4 - i / 10., 0.2 - i / 10., 0.3 - i / 10.) rjd = revoluteJointDef( bodyA=leg, bodyB=lower, localAnchorA=(0, -LEG_H / 2), localAnchorB=(0, LEG_H / 2), enableMotor=True, enableLimit=True, maxMotorTorque=MOTORS_TORQUE, motorSpeed=1, lowerAngle=-1.6, upperAngle=-0.1, ) lower.ground_contact = False self.legs.append(lower) self.joints.append(self.world.CreateJoint(rjd)) self.drawlist = self.terrain + self.legs + [self.hull] self.counter = 0 #print('in reset', self.counter) class LidarCallback(Box2D.b2.rayCastCallback): def ReportFixture(self, fixture, point, normal, fraction): if (fixture.filterData.categoryBits & 1) == 0: return -1 self.p2 = point self.fraction = fraction return fraction self.lidar = [LidarCallback() for _ in range(10)] self.terminal = False self.action = [] self.state = {} empty_observation = [init_x, init_x, init_x, init_x, init_x] self.position_history = deque(empty_observation) self.state_history = deque([None, None, None, None, None]) self.leg_history = deque([ (self.legs[0].position[0], self.legs[2].position[0]), (self.legs[0].position[0], self.legs[2].position[0]), (self.legs[0].position[0], self.legs[2].position[0]), (self.legs[0].position[0], self.legs[2].position[0]), (self.legs[0].position[0], self.legs[2].position[0]) ]) if self.state_machine is not None and self.state_machine.state != 'start': self.state_machine.reset() # reset the state of the state machine. self.state_machine.num_timesteps = 0 # reset the iteration counter. self.state_machine.swinging_leg = 'right' self.state_machine.start = True self.state_machine.action_mask = mask(self.action_space) #print('resetting state machine') self.state_machine.action_mask[0] = [1, 0, 0] #print('reset ', self.state_machine.num_timesteps) elif self.state_machine is not None: self.state_machine.num_timesteps = 0 # reset the iteration counter. self.state_machine.swinging_leg = 'right' #print('reset ', self.state_machine.num_timesteps) return self.step(np.array([0, 10, 10, 10, 10]))[0]
import imageio from gym.spaces import MultiDiscrete import stable_baselines.common.walker_action_mask as walker from stable_baselines import PPO2, A2C from stable_baselines.common.evaluation import evaluate_policy from stable_baselines.common.vec_env import DummyVecEnv from curiosity_mask.util import create_dummy_action_mask as mask # Create environment. env = DummyVecEnv([walker.BipedalWalker]) brain = PPO2.load("walker_brain_1a") # Evaluate the agent. #mean_reward, n_steps = evaluate_policy(brain, brain.get_env(), n_eval_episodes=1) #print(mean_reward) action_space = MultiDiscrete([3, 21, 21, 21, 21]) action_mask = mask(action_space) # Predict from the trained agent and record animated gif. #images = [] obs, done, action_masks = env.reset(), [False], [] for i in range(1000): action, _states = brain.predict(obs, action_mask=action_masks) obs, rewards, dones, info = env.step(action) env.render() #images.append(env.render(mode='rgb_array')) #imageio.mimsave('./logs/walker-ppo2-curiosity-mask.gif', images, fps=29)
def __init__(self, verbose=2, locals=locals, globals=globals): super(ActionMaskCallback, self).__init__(verbose) # Those variables will be accessible in the callback # (they are defined in the base class) # The RL model self.model = None # type: BaseRLModel # An alias for self.model.get_env(), the environment used for training self.training_env = None # type: Union[gym.Env, VecEnv, None] # Number of time the callback was called self.n_calls = 0 # type: int self.num_timesteps = 0 # type: int # local and global variables self.locals = None # type: Dict[str, Any] self.globals = None # type: Dict[str, Any] # The logger object, used to report things in the terminal # self.logger = None # type: logger.Logger # # Sometimes, for event callback, it is useful # # to have access to the parent object # self.parent = None # type: Optional[BaseCallback] # Set all actions to valid. self.action_space = MultiDiscrete([3, 21, 21, 21, 21]) self.action_mask = mask(self.action_space) self.action_mask[0] = [1, 0, 0] states = [ 'lift_leg', 'fuzzy_transition_1', 'plant_leg', 'switch_leg', 'fuzzy_transition_2' ] # Fuzzy transition 1 determines how long to lift swinging leg before planting it. # Fuzzy transition 2 determines how long to swing switched leg before lifing it. self.gait = Walk() self.gait.num_timesteps = self.num_timesteps self.gait.action_mask = mask(self.action_space) self.gait.action_mask[0] = [1, 0, 0] self.gait.swinging_leg = 'right' self.gait.terminal = False self.gait.start = True machine = Machine(self.gait, states=states, send_event=True, initial='start') #machine.add_transition('move', 'lift_leg', 'plant_leg', conditions='is_swinging_leg_lifted', prepare='set_mask_gait_1', before=['set_mask_gait_2', 'reset_step_counter']) #machine.add_transition('move', 'plant_leg', 'switch_leg', conditions='is_swinging_leg_planted', prepare='set_mask_gait_2', before=['set_mask_gait_2', 'reset_step_counter']) #machine.add_transition('move', 'switch_leg', 'lift_leg', conditions='is_swinging_leg_leading', prepare='set_mask_gait_3', before=['set_mask_gait_2', 'reset_step_counter']) machine.add_transition('move', 'start', 'fuzzy_transition_1', conditions='is_swinging_leg_lifted', prepare='set_mask_start', before=['set_mask_start', 'reset_step_counter']) machine.add_transition( 'move', 'lift_leg', 'fuzzy_transition_1', conditions='is_swinging_leg_lifted', unless='is_start', prepare='set_mask_gait_1', before=['set_mask_gait_1', 'reset_step_counter']) machine.add_transition( 'move', 'fuzzy_transition_1', 'plant_leg', conditions='done_lifting_swinging_leg', prepare=['set_mask_gait_1', 'set_fuzzy_decision1'], before=['terminate', 'reset_step_counter', 'set_mask_gait_2']) machine.add_transition( 'move', 'plant_leg', 'switch_leg', conditions='is_swinging_leg_planted', prepare='set_mask_gait_2', before=['reset_step_counter', 'switch_legs', 'set_mask_gait_3']) machine.add_transition('move', 'switch_leg', 'fuzzy_transition_2', conditions='is_swinging_leg_leading', prepare='set_mask_gait_3', before=['set_mask_gait_3']) machine.add_transition( 'move', 'fuzzy_transition_2', 'lift_leg', conditions='start_lifting_swinging_leg', prepare=['set_mask_gait_3', 'set_fuzzy_decision2'], before=['reset_step_counter', 'set_mask_gait_1']) machine.add_transition('reset', 'fuzzy_transition_1', 'start', before=['reinstate', 'set_mask_gait_1']) machine.add_transition('reset', 'plant_leg', 'start', before=['reinstate', 'set_mask_gait_1']) machine.add_transition('reset', 'switch_leg', 'start', before=['reinstate', 'set_mask_gait_1']) machine.add_transition('reset', 'fuzzy_transition_2', 'start', before=['reinstate', 'set_mask_gait_1']) machine.add_transition('reset', 'lift_leg', 'start', before=['reinstate', 'set_mask_gait_1']) #print('callback init') self.ui = UI() self.ui.event, self.ui.values = self.ui.window.read() if self.ui.event == 'Cancel': self.ui.window.close() elif self.ui.event == 'Reset': self.training_env.env_method('reset') elif self.ui.event in (None, 'Submit'): self.gait.teaching = self.translate_ui(self.ui.values)
def set_mask(self, event): """ if event.kwargs.get('action') is not None and len(event.kwargs.get('action')) == 5: gait = event.kwargs.get('action')[0] else: gait = 0 #""" if event.kwargs.get('action') is None: gait = 0 #print(self.state) if self.state == 'start' or self.state == 'lift_leg': gait = 0 gait_action_mask = [1, 0, 0] if self.state == 'plant_leg': gait = 1 gait_action_mask = [0, 1, 0] if self.state == 'switch_leg': gait = 2 gait_action_mask = [0, 0, 1] teaching = self.teaching # event.kwargs.get('teaching') gait_ref = str(gait+1) #gait_action_mask = self.action_mask[0] self.action_mask = mask(MultiDiscrete([3, 21, 21, 21, 21])) self.action_mask[0] = gait_action_mask # Flip bits to teach the strategy for this gait. (Left Hip, Left Knee, Right, Hip, Right Knee) start_crouch_length = 6 switch_crouch_length = 8 if self.swinging_leg == 'left': ranges = {'left_hip': {'min': teaching['gait-' + gait_ref + '-swinging-hip-min'], 'max': teaching['gait-' + gait_ref + '-swinging-hip-max']}, 'left_knee': {'min': teaching['gait-' + gait_ref + '-swinging-knee-min'], 'max': teaching['gait-' + gait_ref + '-swinging-knee-max']}, 'right_hip': {'min': teaching['gait-' + gait_ref + '-planted-hip-min'], 'max': teaching['gait-' + gait_ref + '-planted-hip-max']}, 'right_knee': {'min': teaching['gait-' + gait_ref + '-planted-knee-min'], 'max': teaching['gait-' + gait_ref + '-planted-knee-max']} } #""" if ((self.state == 'start' or self.state == 'lift_leg') and self.num_timesteps <= start_crouch_length) or (self.state == 'switch_leg' and self.num_timesteps <= switch_crouch_length): ranges['right_hip']['min'] = 10 if ((self.state == 'start' or self.state == 'lift_leg') and self.num_timesteps > start_crouch_length) or (self.state == 'switch_leg' and self.num_timesteps > switch_crouch_length): ranges['right_hip']['max'] = 10 #""" self.action_mask = gait_mask(gait, ranges, self.action_mask) if self.swinging_leg == 'right': ranges = {'left_hip': {'min': teaching['gait-' + gait_ref + '-planted-hip-min'], 'max': teaching['gait-' + gait_ref + '-planted-hip-max']}, 'left_knee': {'min': teaching['gait-' + gait_ref + '-planted-knee-min'], 'max': teaching['gait-' + gait_ref + '-planted-knee-max']}, 'right_hip': {'min': teaching['gait-' + gait_ref + '-swinging-hip-min'], 'max': teaching['gait-' + gait_ref + '-swinging-hip-max']}, 'right_knee': {'min': teaching['gait-' + gait_ref + '-swinging-knee-min'], 'max': teaching['gait-' + gait_ref + '-swinging-knee-max']} } #""" if ((self.state == 'start' or self.state == 'lift_leg') and self.num_timesteps <= start_crouch_length) or (self.state == 'switch_leg' and self.num_timesteps <= switch_crouch_length): ranges['left_hip']['min'] = 10 if ((self.state == 'start' or self.state == 'lift_leg') and self.num_timesteps > start_crouch_length) or (self.state == 'switch_leg' and self.num_timesteps > switch_crouch_length): ranges['left_hip']['max'] = 10 #""" #print('gait mask details: ', gait, ranges, self.action_mask[1]) self.action_mask = gait_mask(gait, ranges, self.action_mask)