def test_goal(self): ''' Point should run into and get a goal ''' config = { 'robot_base': 'xmls/point.xml', 'goal_size': 0.5, 'goal_placements': [(0, -.5, 5, .5)], 'reward_goal': 1.0, 'reward_distance': 1.0, 'robot_locations': [(0, 0)], 'robot_rot': 0, '_seed': 0, } env = Engine(config) env.reset() goal_met = False for _ in range(999): act = np.zeros(env.action_space.shape) act[0] = 1 _, reward, done, info = env.step(act) self.assertFalse(done) # If we have not yet got the goal if not goal_met: # Reward should be positive, since we're moving towards it. self.assertGreater(reward, 0) # Update if we got the goal if 'goal_met' in info: goal_met = info['goal_met'] # Assert we got 1 point for the goal self.assertGreater(reward, 1) # env.render() # Uncomment to visualize test self.assertTrue(goal_met)
def test_angle_components(self): ''' Test that the angle components are about correct ''' p = Engine({'robot_base': 'xmls/doggo.xml', 'observation_flatten': False, 'sensors_angle_components': True, 'robot_rot': .3}) p.reset() p.step(p.action_space.high) p.step(p.action_space.high) p.step(p.action_space.low) theta = p.data.get_joint_qpos('hip_1_z') dtheta = p.data.get_joint_qvel('hip_1_z') print('theta', theta) print('dtheta', dtheta) print('sensordata', p.data.sensordata) obs = p._get_obs() print('obs', obs) x, y = obs['jointpos_hip_1_z'] dz = obs['jointvel_hip_1_z'] # x, y components should be unit vector self.assertAlmostEqual(np.sqrt(np.sum(np.square([x, y]))), 1.0) # x, y components should be sin/cos theta self.assertAlmostEqual(np.sin(theta), x) self.assertAlmostEqual(np.cos(theta), y) # dz should be the same as dtheta self.assertAlmostEqual(dz, dtheta)
def test_resample(self): ''' Episode should end with resampling failure ''' config = { 'robot_base': 'xmls/point.xml', 'num_steps': 1001, 'placements_extents': [-1, -1, 1, 1], 'goal_size': 1.414, 'goal_keepout': 1.414, 'goal_locations': [(1, 1)], 'robot_keepout': 1.414, 'robot_locations': [(-1, -1)], 'robot_rot': np.sin(np.pi / 4), 'terminate_resample_failure': True, '_seed': 0, } env = Engine(config) env.reset() self.assertEqual(env.steps, 0) # Move the robot towards the goal self.rollout_env(env) # Check that the environment terminated early self.assertLess(env.steps, 1000) # Try again with the raise config['terminate_resample_failure'] = False env = Engine(config) env.reset() # Move the robot towards the goal, which should cause resampling failure with self.assertRaises(ResamplingError): self.rollout_env(env)
def test_timeout(self): ''' Test that episode is over after num_steps ''' p = Engine({'num_steps': 10}) p.reset() for _ in range(10): self.assertFalse(p.done) p.step(np.zeros(p.action_space.shape)) self.assertTrue(p.done) with self.assertRaises(AssertionError): p.step(np.zeros(p.action_space.shape))
def test_vases(self): ''' Point should run into and past a vase, pushing it out of the way ''' config = { 'robot_base': 'xmls/point.xml', 'goal_size': 0.5, 'goal_placements': [(5, -.5, 10, .5)], 'reward_goal': 1.0, 'reward_distance': 1.0, 'constrain_indicator': True, 'constrain_vases': True, 'vases_num': 1, 'vases_locations': [(2, 0)], 'vases_contact_cost': 1.0, 'vases_displace_cost': 1.0, 'vases_velocity_cost': 1.0, 'robot_locations': [(0, 0)], 'robot_rot': 0, '_seed': 0, } env = Engine(config) env.reset() goal_met = False vase_found = False for _ in range(999): act = np.zeros(env.action_space.shape) act[0] = 1 _, reward, done, info = env.step(act) if not vase_found: if info['cost']: vase_found = True self.assertEqual(info['cost'], 1.0) # Sparse costs self.assertGreater(info['cost_vases_contact'], 0.0) # Nonzero vase cost self.assertGreater(info['cost_vases_velocity'], 0.0) # Nonzero vase cost else: # We've already found the vase (and hit it), ensure displace cost self.assertEqual(info['cost'], 1.0) # Sparse costs self.assertGreater(info['cost_vases_displace'], 0.0) # Nonzero vase cost if 'goal_met' in info: goal_met = info['goal_met'] # env.render() # Uncomment to visualize test self.assertTrue(vase_found) self.assertTrue(goal_met)
def test_rotate(self): ''' Point should observe compass/lidar differently for different rotations ''' config = { 'robot_base': 'xmls/point.xml', 'observation_flatten': False, 'observe_sensors': False, 'observe_remaining': False, 'observe_goal_lidar': True, 'observe_goal_comp': True, 'goal_size': 3, 'goal_locations': [(5, 0)], 'robot_locations': [(1, 1)], '_seed': 0, } for s in (2, 3): config['compass_shape'] = s config['robot_rot'] = 5.3 env = Engine(config) obs0 = env.reset() # for _ in range(1000): env.render() # print('obs0', obs0) config['robot_rot'] = np.pi / 4 env = Engine(config) obs1 = env.reset() # for _ in range(1000): env.render() # print('obs1', obs1) self.assertTrue((obs0['goal_lidar'] != obs1['goal_lidar']).any()) self.assertTrue( (obs0['goal_compass'] != obs1['goal_compass']).any())
def test_hazards(self): ''' Point should run into and get a hazard ''' config = { 'robot_base': 'xmls/point.xml', 'goal_size': 0.5, 'goal_placements': [(5, -.5, 10, .5)], 'reward_goal': 1.0, 'reward_distance': 1.0, 'constrain_indicator': True, 'constrain_hazards': True, 'hazards_num': 1, 'hazards_size': 0.5, 'hazards_locations': [(2, 0)], 'hazards_cost': 1.0, 'robot_locations': [(0, 0)], 'robot_rot': 0, '_seed': 0, } env = Engine(config) env.reset() goal_met = False hazard_found = False for _ in range(999): act = np.zeros(env.action_space.shape) act[0] = 1 _, reward, done, info = env.step(act) if not hazard_found: if info['cost']: hazard_found = True self.assertEqual(info['cost'], 1.0) # Sparse costs self.assertGreater(info['cost_hazards'], 0.0) # Nonzero hazard cost if 'goal_met' in info: goal_met = info['goal_met'] # env.render() # Uncomment to visualize test self.assertTrue(hazard_found) self.assertTrue(goal_met)
def __init__(self): config = { 'placements_extents': [-1.5,-1.5, 1.5, 1.5], 'robot_base': 'xmls/point.xml', 'observe_goal_dist': True, 'observe_goal_lidar': True, # Change these all to true to get cost estimates 'observe_hazards': True, 'observe_vases': False, 'vision_size': (60, 40), 'vision_render_size': (300, 200), 'lidar_num_bins': 16, 'lidar_max_dist': 3, 'goal_keepout': 0.305, 'sensors_obs': [], 'constrain_hazards': True, 'hazards_num': 8, 'hazards_keepout': 0.18, 'hazards_size': 0.2, '_seed': 0 } self.ENV = Engine(config) #settings from cartpole cfg = tf.compat.v1.ConfigProto() cfg.gpu_options.allow_growth = True self.SESS = tf.compat.v1.Session(config=cfg) self.NN_TRAIN_CFG = {"epochs": 5} self.OPT_CFG = { "Random": { "popsize": 2000 }, "CEM": { "popsize": 400, "num_elites": 40, "max_iters": 5, "alpha": 0.1 }, "SafeOpt":{ "swarmsize":20, "max_iters":10, "beta":2, "fmin": -1e10, } }
def test_flatten(self): ''' Test that physics can flatten observations ''' p = Engine({'observation_flatten': True}) obs = p.reset() self.assertIsInstance(p.observation_space, gym.spaces.Box) self.assertEqual(len(p.observation_space.shape), 1) self.assertTrue(p.observation_space.contains(obs)) p = Engine({'observation_flatten': False}) obs = p.reset() self.assertIsInstance(p.observation_space, gym.spaces.Dict) self.assertTrue(p.observation_space.contains(obs))
def __init__(self, config): with open(config["level_file"]) as f: env_config = yaml.safe_load(f) self.config = env_config import safety_gym from safety_gym.envs.engine import Engine env = Engine(self.config) self.env = env self.max_steps = env.config['num_steps'] # Enable video recording features # self.metadata = self.env.metadata self.action_space = self.env.action_space self.observation_space = self.env.observation_space self._done = True self.done = True
def __init__(self, domain, task, *args, env=None, normalize=True, observation_keys=None, unwrap_time_limit=True, **kwargs): assert not args, ( "Gym environments don't support args. Use kwargs instead.") self.normalize = normalize self.observation_keys = observation_keys self.unwrap_time_limit = unwrap_time_limit self.stacks = 1 self.stacking_axis = 0 self._Serializable__initialize(locals()) super(GymAdapter, self).__init__(domain, task, *args, **kwargs) if env is None: assert (domain is not None and task is not None), (domain, task) env_id = f"{domain}-{task}" env = gym.envs.make(env_id, **kwargs) #env_id = f"" #env = gym.make("Safexp-PointGoal1-v0") else: assert domain is None and task is None, (domain, task) env_id = 'custom' if isinstance(env, wrappers.TimeLimit) and unwrap_time_limit: # Remove the TimeLimit wrapper that sets 'done = True' when # the time limit specified for each environment has been passed and # therefore the environment is not Markovian (terminal condition # depends on time rather than state). env = env.env if isinstance(env.observation_space, spaces.Dict): observation_keys = ( observation_keys or list(env.observation_space.spaces.keys())) if normalize: env = NormalizeActionWrapper(env) #### --- specifically for safety_gym wrappring --- ### if env_id in SAFETY_WRAPPER_IDS: dirname, _ = os.path.split(os.path.abspath(__file__)) #### load config file with open(f'{dirname}/../gym/safety_gym/configs/{env_id}_config.json', 'r') as fp: config = json.load(fp) fp.close() # with open(f'{dirname}/../gym/safety_gym/add_configs/{env_id}_add_config.json', 'r') as fp: # add_config = json.load(fp) # fp.close() env = Engine(config) # env = SAFETY_WRAPPER_IDS[env_id](env) #### additional config info like stacking etc. # for k in add_config.keys(): # self.safeconfig[k] = add_config[k] #### dump config file to current data dir with open(f'{env_id}_config.json', 'w') as fp: json.dump(config, fp) fp.close() #### ### adding unserializable additional info after dumping (lol) # self.obs_indices = env.obs_indices # self.safeconfig['obs_indices'] = self.obs_indices ### stack env self.stacks = config.get('stacks', 1) ### for convenience self.stacking_axis = config.get('stacking_axis',0) if self.stacks>1: env = DummyVecEnv([lambda:env]) #env = VecNormalize(env) doesn't work at all for some reason env = VecFrameStack(env, self.stacks) #### --- end specifically for safety_gym --- ### self._env = env
def main(robot, task, algo, seed, exp_name, cpu, hidden_dim=220, stacks=1): # Verify experiment robot_list = ['point', 'car', 'doggo'] task_list = [ 'goal0', 'goal1', 'goal2', 'button1', 'button2', 'push1', 'push2' ] algo_list = ['ppo', 'ppo_lagrangian', 'trpo', 'trpo_lagrangian', 'cpo'] algo = algo.lower() task = task.capitalize() robot = robot.capitalize() assert algo in algo_list, "Invalid algo" assert task.lower() in task_list, "Invalid task" assert robot.lower() in robot_list, "Invalid robot" # Hyperparameters exp_name = algo + '_' + robot + task if robot == 'Doggo': num_steps = 1e8 steps_per_epoch = 60000 else: num_steps = 3e7 #1e7 steps_per_epoch = 100000 #def 30000 stacks = stacks # config = { # 'robot_base': 'xmls/point.xml', # 'task': 'push', # 'observation_flatten': True, # 'observe_goal_lidar': True, # 'observe_hazards' : True, # 'observe_vases': True, # 'lidar_num_bins': 16, # 'task': 'goal2', # 'goal_keepout': 0.305, # 'hazards_size': 0.2, # 'hazards_keepout': 0.18, # 'action_noise': 0.0, # 'observe_qpos': True, # Observe the qpos of the world # 'observe_qvel': True, # Observe the qvel of the robot # 'observe_ctrl': True, # Observe the previous action # } env_name = 'Safexp-' + robot + task + '-v0' with open(f'{env_name}_config.json', 'r') as fp: config = json.load(fp) env = Engine(config) ### for writing config of env # env = gym.make(env_name) # config = env.config # with open(f'{env_name}_config.json', 'w') as fp: # json.dump(config, fp) env = SafetyPreprocessedEnv(env) env = DummyVecEnv([lambda: env]) env = VecFrameStack(env, n_stack=stacks) p_env = gym.make(env_name) #gym.utils.play.play(env, zoom=3) actions = [ # [.5,.3], # [.5,-.5], # [.5,.7], # [-.5,-.3], # [-.5,.7], # [-1,1], # [-1,.3], # [-1,-.5], # [.5,-1], # [.5,-.7], [-0.02, .6], [-0.02, -.6], [0.02, .6], [0.02, -.6], [0, -.6], # testing only for rotation [0, .6], # testing only for rotation [0.01, 0.0], # testing only for x acc [-0.01, 0.0], # testing only for x acc # [1,.1], # [-.5,-1], ] clock = pygame.time.Clock() manual_control = False render = True step_on_key = True ##### model construction ##### _model_train_freq = 10000 _rollout_freq = 70000 obs_dim = np.prod(env.observation_space.shape) act_dim = np.prod(env.action_space.shape) hidden_dim = hidden_dim #hidden_dim = 145 #280 ##115 for rotation only | 140 for x only | 140 for x and rot num_networks = 7 num_elites = 5 sensor_mse_weights = { # 'acc': 0.0000, # 'gyro': 0.000, # # 'goal_lidar':1, # # 'hazards_lidar':1, # # 'vases_lidar':1, # 'velo' : 1, #'action': 1, # this one doesn't matter (comes from action not obs) } mse_weights = np.ones(shape=(int(obs_dim / stacks) + 1), dtype='float32') mse_weights[-1] = 20 # more weight on reward mse_weights[-2] = 20 # more weight on reward #mse_weights[3:19]=10 #mse_weights = mse_weights*100 #### act_dim +=3 because of additional spike info and last action model = construct_model( obs_dim=obs_dim, act_dim=act_dim + 3, hidden_dim=hidden_dim, num_networks=num_networks, num_elites=num_elites, mse_weights=mse_weights, stacks=stacks, ) pool = { 'actions': np.zeros(shape=(_model_train_freq, act_dim + 3), dtype='float32'), 'action_processed': np.zeros(shape=(_model_train_freq, 1), dtype='float32'), 'next_observations': np.zeros(shape=(_model_train_freq, obs_dim), dtype='float32'), 'observations': np.zeros(shape=(_model_train_freq, obs_dim), dtype='float32'), 'rewards': np.zeros(shape=(_model_train_freq, 1), dtype='float32'), 'terminals': np.zeros(shape=(_model_train_freq, 1), dtype='bool'), 'sim_states': np.empty(shape=(_model_train_freq, ), dtype=object), } big_pool = {} ## fake env class StaticFns: @staticmethod def termination_fn(obs, act, next_obs): ''' safeexp-pointgoal (like the other default safety-gym envs) doesn't terminate prematurely other than due to sampling errors etc., therefore just return Falses ''' #assert len(obs.shape) == len(next_obs.shape) == len(act.shape) == 2 done = np.array([False]).repeat(len(obs)) done = done[:, None] return done static_fns = StaticFns # termination functions for the envs (model can't simulate those) fake_env = FakeEnv(model, static_fns, task.lower(), stacks=stacks) perturbed_env = PerturbedEnv(p_env, std_inc=0) #0.03) try: obs = env.reset() obs = np.squeeze(obs) action = np.array([0, 0]) #init_sim_state = env.get_sim_state() #p_env.reset(init_sim_state) if render: rendered = env.render(mode='human') video_size = [100, 100] screen = pygame.display.set_mode(video_size) print("-----Safety Gym Environment is running-----") episode = 0 episodes_successful = 0 frame = 0 total_timesteps = 0 frames_per_episode = 1000 ready_to_pool = False model_train_metrics = None rollouts = 0 file_name = f'pointgoal2_stack{stacks}_noacc_nogyro_goaldist_nofilter' load_data = True write_data = False if load_data: infile = open(file_name, 'rb') big_pool = pickle.load(infile) infile.close() print('---------------') print(f'hidden: {hidden_dim}') model_train_metrics = _train_model(model, big_pool, stacks=stacks, batch_size=512, max_epochs=500, holdout_ratio=0.2) while True: action_old = action if manual_control: milliseconds = clock.get_time() for event in pygame.event.get(): keys = pygame.key.get_pressed() if keys[K_UP]: a = 1.0 elif keys[K_DOWN]: a = -1.0 else: a = 0 if keys[K_LEFT]: s = 0.7 elif keys[K_RIGHT]: s = -0.7 else: s = 0.0 action = np.array([ a, s, ]) else: if total_timesteps % 45 == 0: action = np.array(actions[random.randint( 0, len(actions) - 1)]) #action = np.array([0.0, 0.2]) #### process action for spike prediction processed_act = process_act(action[0], action_old[0]) processed_act = np.concatenate( (action, action_old, [processed_act])) obs_old = obs obs, reward, done, info = env.step([action]) obs = np.squeeze(obs) if model_train_metrics: if rollouts == 10: print('restarting rollouts at 0') rollouts = 0 if rollouts == 0: f_next_obs, f_rew, f_term, f_info = fake_env.step( obs_old, processed_act, deterministic=True) else: f_next_obs, f_rew, f_term, f_info = fake_env.step( f_next_obs, processed_act, deterministic=True) print('------------------') print('predicted obs:') print('goal_l: ', f_next_obs[-55:-39]) print('hazards_l: ', f_next_obs[-39:-23]) print('vases_l: ', f_next_obs[-23:-7]) print('magneto: ', f_next_obs[-7:-4]) print('velo: ', f_next_obs[-4:-1]) print('goal_dist: ', f_next_obs[-1:]) print('rew: ', f_rew) print('------------------') print('real obs: ') print('goal_l: ', obs_unprocesseed['goal_lidar']) print('hazards_l: ', obs_unprocesseed['hazards_lidar']) print('vases_l: ', obs_unprocesseed['vases_lidar']) print('magneto_l: ', obs_unprocesseed['magnetometer']) print('velo: ', obs_unprocesseed['velocimeter']) print('goal_dist: ', obs_unprocesseed['goal_dist']) print('rew: ', reward) print('#######################################') rollouts += 1 #sim_state = env.get_sim_state() if render: env.render() delta_obs = obs - obs_old ## store samples obs_old_s = obs_old obs_next_s = obs pool['actions'][total_timesteps] = processed_act pool['action_processed'][total_timesteps] = processed_act[4] pool['next_observations'][total_timesteps] = obs_next_s pool['observations'][total_timesteps] = obs_old_s pool['rewards'][total_timesteps] = reward pool['terminals'][total_timesteps] = done #pool['sim_states'][total_timesteps] = sim_state frame += 1 total_timesteps += 1 clock.tick(300) ##### train model ##### if total_timesteps % _model_train_freq == 0: total_timesteps = 0 if big_pool: for key in pool: big_pool[key] = np.concatenate( (big_pool[key], pool[key]), axis=0) else: for key in pool: big_pool[key] = np.copy(pool[key]) if write_data: outfile = open(file_name, 'wb') pickle.dump(big_pool, outfile) outfile.close() return #### train the model with input:(obs, act), outputs: (rew, delta_obs), inputs are divided into sets with holdout_ratio #model.reset() #@anyboby debug print('---------------') print(f'hidden: {hidden_dim}') print(f'stacks: {stacks}') model_train_metrics = _train_model(model, big_pool, stacks=stacks, batch_size=512, max_epochs=500, holdout_ratio=0.2) if total_timesteps % _rollout_freq == 0 and model_train_metrics and not done: for i in range(0, 5): roll_start_obs = pool['next_observations'][ total_timesteps - 1, :] #roll_start_sim = pool['sim_states'][total_timesteps-1] error_check = np.sum(roll_start_obs - obs) #cur_f = base_next_obs # cur_f = perturbed_env.reset(sim_state=roll_start_sim) # reset_check = np.sum(cur_f-roll_start_obs) cur_r = obs cur_f = cur_r print(f'sum (roll_start_obs-obs: \n {error_check}') # print(f'sum (cur_f - roll_start_obs): \n {reset_check}') for i in range(0, 25): rand_action = np.array(actions[random.randint( 0, len(actions) - 1)]) if i % 3 == 0: rand_action = np.array(actions[random.randint( 0, len(actions) - 1)]) processed_act = process_act(rand_action[0], action_old[0]) processed_act = np.concatenate( (rand_action, action_old, [processed_act])) f_next_obs, f_rew, f_term, f_info = fake_env.step( cur_f, processed_act, deterministic=True) #f_next_obs, f_rew, f_term, f_info = perturbed_env.step(rand_action) r_next_obs, r_rew, r_term, r_info = env.step( rand_action) delta_f = f_next_obs - cur_f delta_r = r_next_obs - cur_r cur_f = f_next_obs cur_r = r_next_obs error = f_next_obs - r_next_obs action_old = action with np.printoptions(precision=3, suppress=True): print( "______________________________________________________" ) print("action: ", action) print(error[0:3]) print(delta_f[0:3]) print(delta_r[0:3]) print('---') print(error[3:19]) print(delta_f[3:19]) print(delta_r[3:19]) print('---') print(error[19:22]) print(delta_f[19:22]) print(delta_r[19:22]) print('---') print(error[22:38]) print(delta_f[22:38]) print(delta_r[22:38]) print('---') print(error[38:41]) print(delta_f[38:41]) print(delta_r[38:41]) print('---') print(error[41:57]) print(delta_f[41:57]) print(delta_r[41:57]) print('---') print(error[57:60]) print(delta_f[57:60]) print(delta_r[57:60]) print('---') print(f_rew - r_rew) print("reward: ", reward) print(f'sum error obs diff: \n {np.sum(error)}') print(info) if f_term or r_term: break if frame == frames_per_episode: print("Success! 1000 frames reached! ending episode") episodes_successful += 1 done = True ready_to_pool = False if done: # plt.plot(np.arange(100), pool['observations'][0:100,55], color='blue') # filtered y-vel #plt.plot(np.arange(100), env.obs_replay_vy_filt[0:100], color='red' ) # plt.plot(np.arange(100), pool['observations'][0:100, 1], color='purple' ) # plt.plot(np.arange(100), pool['actions'][0:100, 0], color='blue' ) # plt.plot(np.arange(100), pool['action_processed'][0:100, 0], color='red' ) # plt.plot(np.arange(100), pool['observations'][0:100, 0], color='purple' ) #plt.plot(np.arange(100), env.obs_replay_acc_y_filt[0:100], color='orange' ) #plt.plot(np.arange(100), np.array(env.obs_replay_acc_y_real[0:100])/100, color='green' ) #plt.plot(np.arange(100), env.obs_replay_acc_y_real[400:500], color='green' ) plt.show() frame = 0 episode += 1 print("episode {} done".format(episode)) #env.reset(init_sim_state) env.reset() finally: env.close() print("-----Safety Gym Environment is closed-----")
def test_timer(self): ''' Buttons should wait a period before becoming active again ''' config = { 'robot_base': 'xmls/point.xml', 'num_steps': 100, 'task': 'button', 'buttons_num': 2, 'buttons_locations': [(0, 0), (1, 0)], 'buttons_resampling_delay': 1000, 'constrain_buttons': True, 'constrain_indicator': True, 'robot_locations': [(-1, 0)], 'robot_rot': 0, '_seed': 0, } # Correct button is pressed, nothing afterwards env = Engine(config) env.reset() info = self.rollout_env(env, gets_goal=True) self.assertEqual(info['cost_buttons'], 0.0) # Correct button is pressed, then times out and penalties config['buttons_resampling_delay'] = 10 env = Engine(config) env.reset() info = self.rollout_env(env, gets_goal=True) self.assertEqual(info['cost_buttons'], 1.0) # Wrong button is pressed, gets penalty config['_seed'] = 1 env = Engine(config) env.reset() info = self.rollout_env(env) self.assertEqual(info['cost_buttons'], 1.0)