Exemple #1
0
 def test_goal(self):
     ''' Point should run into and get a goal '''
     config = {
         'robot_base': 'xmls/point.xml',
         'goal_size': 0.5,
         'goal_placements': [(0, -.5, 5, .5)],
         'reward_goal': 1.0,
         'reward_distance': 1.0,
         'robot_locations': [(0, 0)],
         'robot_rot': 0,
         '_seed': 0,
     }
     env = Engine(config)
     env.reset()
     goal_met = False
     for _ in range(999):
         act = np.zeros(env.action_space.shape)
         act[0] = 1
         _, reward, done, info = env.step(act)
         self.assertFalse(done)
         # If we have not yet got the goal
         if not goal_met:
             # Reward should be positive, since we're moving towards it.
             self.assertGreater(reward, 0)
             # Update if we got the goal
             if 'goal_met' in info:
                 goal_met = info['goal_met']
                 # Assert we got 1 point for the goal
                 self.assertGreater(reward, 1)
         # env.render()  # Uncomment to visualize test
     self.assertTrue(goal_met)
Exemple #2
0
 def test_angle_components(self):
     ''' Test that the angle components are about correct '''
     p = Engine({'robot_base': 'xmls/doggo.xml',
                  'observation_flatten': False,
                  'sensors_angle_components': True,
                  'robot_rot': .3})
     p.reset()
     p.step(p.action_space.high)
     p.step(p.action_space.high)
     p.step(p.action_space.low)
     theta = p.data.get_joint_qpos('hip_1_z')
     dtheta = p.data.get_joint_qvel('hip_1_z')
     print('theta', theta)
     print('dtheta', dtheta)
     print('sensordata', p.data.sensordata)
     obs = p._get_obs()
     print('obs', obs)
     x, y = obs['jointpos_hip_1_z']
     dz = obs['jointvel_hip_1_z']
     # x, y components should be unit vector
     self.assertAlmostEqual(np.sqrt(np.sum(np.square([x, y]))), 1.0)
     # x, y components should be sin/cos theta
     self.assertAlmostEqual(np.sin(theta), x)
     self.assertAlmostEqual(np.cos(theta), y)
     # dz should be the same as dtheta
     self.assertAlmostEqual(dz, dtheta)
    def test_resample(self):
        ''' Episode should end with resampling failure '''
        config = {
            'robot_base': 'xmls/point.xml',
            'num_steps': 1001,
            'placements_extents': [-1, -1, 1, 1],
            'goal_size': 1.414,
            'goal_keepout': 1.414,
            'goal_locations': [(1, 1)],
            'robot_keepout': 1.414,
            'robot_locations': [(-1, -1)],
            'robot_rot': np.sin(np.pi / 4),
            'terminate_resample_failure': True,
            '_seed': 0,
        }
        env = Engine(config)
        env.reset()
        self.assertEqual(env.steps, 0)
        # Move the robot towards the goal
        self.rollout_env(env)
        # Check that the environment terminated early
        self.assertLess(env.steps, 1000)

        # Try again with the raise
        config['terminate_resample_failure'] = False
        env = Engine(config)
        env.reset()
        # Move the robot towards the goal, which should cause resampling failure
        with self.assertRaises(ResamplingError):
            self.rollout_env(env)
Exemple #4
0
 def test_timeout(self):
     ''' Test that episode is over after num_steps '''
     p = Engine({'num_steps': 10})
     p.reset()
     for _ in range(10):
         self.assertFalse(p.done)
         p.step(np.zeros(p.action_space.shape))
     self.assertTrue(p.done)
     with self.assertRaises(AssertionError):
         p.step(np.zeros(p.action_space.shape))
Exemple #5
0
 def test_vases(self):
     ''' Point should run into and past a vase, pushing it out of the way '''
     config = {
         'robot_base': 'xmls/point.xml',
         'goal_size': 0.5,
         'goal_placements': [(5, -.5, 10, .5)],
         'reward_goal': 1.0,
         'reward_distance': 1.0,
         'constrain_indicator': True,
         'constrain_vases': True,
         'vases_num': 1,
         'vases_locations': [(2, 0)],
         'vases_contact_cost': 1.0,
         'vases_displace_cost': 1.0,
         'vases_velocity_cost': 1.0,
         'robot_locations': [(0, 0)],
         'robot_rot': 0,
         '_seed': 0,
     }
     env = Engine(config)
     env.reset()
     goal_met = False
     vase_found = False
     for _ in range(999):
         act = np.zeros(env.action_space.shape)
         act[0] = 1
         _, reward, done, info = env.step(act)
         if not vase_found:
             if info['cost']:
                 vase_found = True
                 self.assertEqual(info['cost'], 1.0)  # Sparse costs
                 self.assertGreater(info['cost_vases_contact'],
                                    0.0)  # Nonzero vase cost
                 self.assertGreater(info['cost_vases_velocity'],
                                    0.0)  # Nonzero vase cost
         else:
             # We've already found the vase (and hit it), ensure displace cost
             self.assertEqual(info['cost'], 1.0)  # Sparse costs
             self.assertGreater(info['cost_vases_displace'],
                                0.0)  # Nonzero vase cost
         if 'goal_met' in info:
             goal_met = info['goal_met']
         # env.render()  # Uncomment to visualize test
     self.assertTrue(vase_found)
     self.assertTrue(goal_met)
Exemple #6
0
 def test_rotate(self):
     ''' Point should observe compass/lidar differently for different rotations '''
     config = {
         'robot_base': 'xmls/point.xml',
         'observation_flatten': False,
         'observe_sensors': False,
         'observe_remaining': False,
         'observe_goal_lidar': True,
         'observe_goal_comp': True,
         'goal_size': 3,
         'goal_locations': [(5, 0)],
         'robot_locations': [(1, 1)],
         '_seed': 0,
     }
     for s in (2, 3):
         config['compass_shape'] = s
         config['robot_rot'] = 5.3
         env = Engine(config)
         obs0 = env.reset()
         # for _ in range(1000): env.render()
         # print('obs0', obs0)
         config['robot_rot'] = np.pi / 4
         env = Engine(config)
         obs1 = env.reset()
         # for _ in range(1000): env.render()
         # print('obs1', obs1)
         self.assertTrue((obs0['goal_lidar'] != obs1['goal_lidar']).any())
         self.assertTrue(
             (obs0['goal_compass'] != obs1['goal_compass']).any())
Exemple #7
0
 def test_hazards(self):
     ''' Point should run into and get a hazard '''
     config = {
         'robot_base': 'xmls/point.xml',
         'goal_size': 0.5,
         'goal_placements': [(5, -.5, 10, .5)],
         'reward_goal': 1.0,
         'reward_distance': 1.0,
         'constrain_indicator': True,
         'constrain_hazards': True,
         'hazards_num': 1,
         'hazards_size': 0.5,
         'hazards_locations': [(2, 0)],
         'hazards_cost': 1.0,
         'robot_locations': [(0, 0)],
         'robot_rot': 0,
         '_seed': 0,
     }
     env = Engine(config)
     env.reset()
     goal_met = False
     hazard_found = False
     for _ in range(999):
         act = np.zeros(env.action_space.shape)
         act[0] = 1
         _, reward, done, info = env.step(act)
         if not hazard_found:
             if info['cost']:
                 hazard_found = True
                 self.assertEqual(info['cost'], 1.0)  # Sparse costs
                 self.assertGreater(info['cost_hazards'],
                                    0.0)  # Nonzero hazard cost
         if 'goal_met' in info:
             goal_met = info['goal_met']
         # env.render()  # Uncomment to visualize test
     self.assertTrue(hazard_found)
     self.assertTrue(goal_met)
Exemple #8
0
    def __init__(self):

        config = {
            'placements_extents': [-1.5,-1.5, 1.5, 1.5],
            'robot_base': 'xmls/point.xml',
            'observe_goal_dist': True,
            'observe_goal_lidar': True,
            # Change these all to true to get cost estimates
            'observe_hazards': True,
            'observe_vases': False,

            'vision_size': (60, 40),
            'vision_render_size': (300, 200),
            'lidar_num_bins': 16,
            'lidar_max_dist': 3,
            'goal_keepout': 0.305,
            'sensors_obs': [],
            'constrain_hazards': True,
            'hazards_num': 8,
            'hazards_keepout': 0.18,
            'hazards_size': 0.2,
            '_seed': 0
        }
        self.ENV = Engine(config)

        #settings from cartpole
        cfg = tf.compat.v1.ConfigProto()
        cfg.gpu_options.allow_growth = True
        self.SESS = tf.compat.v1.Session(config=cfg)
        self.NN_TRAIN_CFG = {"epochs": 5}
        self.OPT_CFG = {
            "Random": {
                "popsize": 2000
            },
            "CEM": {
                "popsize": 400,
                "num_elites": 40,
                "max_iters": 5,
                "alpha": 0.1
            },
            "SafeOpt":{
                "swarmsize":20,
                "max_iters":10,
                "beta":2,
                "fmin": -1e10,
            }
        }
Exemple #9
0
    def test_flatten(self):
        ''' Test that physics can flatten observations '''
        p = Engine({'observation_flatten': True})
        obs = p.reset()
        self.assertIsInstance(p.observation_space, gym.spaces.Box)
        self.assertEqual(len(p.observation_space.shape), 1)
        self.assertTrue(p.observation_space.contains(obs))

        p = Engine({'observation_flatten': False})
        obs = p.reset()
        self.assertIsInstance(p.observation_space, gym.spaces.Dict)
        self.assertTrue(p.observation_space.contains(obs))
Exemple #10
0
    def __init__(self, config):
        with open(config["level_file"]) as f:
            env_config = yaml.safe_load(f)

        self.config = env_config
    
        import safety_gym
        from safety_gym.envs.engine import Engine
        env = Engine(self.config)

        self.env = env
        self.max_steps = env.config['num_steps']
        # Enable video recording features
        # self.metadata = self.env.metadata

        self.action_space = self.env.action_space
        self.observation_space = self.env.observation_space
        self._done = True
        self.done = True
    def __init__(self,
                 domain,
                 task,
                 *args,
                 env=None,
                 normalize=True,
                 observation_keys=None,
                 unwrap_time_limit=True,
                 **kwargs):
        assert not args, (
            "Gym environments don't support args. Use kwargs instead.")

        self.normalize = normalize
        self.observation_keys = observation_keys
        self.unwrap_time_limit = unwrap_time_limit
        self.stacks = 1
        self.stacking_axis = 0

        self._Serializable__initialize(locals())
        super(GymAdapter, self).__init__(domain, task, *args, **kwargs)

        if env is None:
            assert (domain is not None and task is not None), (domain, task)
            env_id = f"{domain}-{task}"
            env = gym.envs.make(env_id, **kwargs)

            #env_id = f""
            #env = gym.make("Safexp-PointGoal1-v0")
            
        else:
            assert domain is None and task is None, (domain, task)
            env_id = 'custom'

        if isinstance(env, wrappers.TimeLimit) and unwrap_time_limit:
            # Remove the TimeLimit wrapper that sets 'done = True' when
            # the time limit specified for each environment has been passed and
            # therefore the environment is not Markovian (terminal condition
            # depends on time rather than state).
            env = env.env

        if isinstance(env.observation_space, spaces.Dict):
            observation_keys = (
                observation_keys or list(env.observation_space.spaces.keys()))
        if normalize:
            env = NormalizeActionWrapper(env)


        #### --- specifically for safety_gym wrappring --- ###
        if env_id in SAFETY_WRAPPER_IDS:
            dirname, _ = os.path.split(os.path.abspath(__file__))
            #### load config file
            with open(f'{dirname}/../gym/safety_gym/configs/{env_id}_config.json', 'r') as fp:
                config = json.load(fp)
            fp.close()
            # with open(f'{dirname}/../gym/safety_gym/add_configs/{env_id}_add_config.json', 'r') as fp:
            #     add_config = json.load(fp)
            # fp.close()


            env = Engine(config)
            # env = SAFETY_WRAPPER_IDS[env_id](env)

            #### additional config info like stacking etc.
            # for k in add_config.keys():
            #     self.safeconfig[k] = add_config[k]
                    
            #### dump config file to current data dir
            with open(f'{env_id}_config.json', 'w') as fp:
                json.dump(config, fp)
            fp.close()
            ####

            ### adding unserializable additional info after dumping (lol)
            # self.obs_indices = env.obs_indices
            # self.safeconfig['obs_indices'] = self.obs_indices

            ### stack env
            self.stacks = config.get('stacks', 1) ### for convenience
            self.stacking_axis = config.get('stacking_axis',0)
            if self.stacks>1:
                env = DummyVecEnv([lambda:env])
                #env = VecNormalize(env)        doesn't work at all for some reason
                env = VecFrameStack(env, self.stacks)

        #### --- end specifically for safety_gym  --- ###


        self._env = env
Exemple #12
0
def main(robot, task, algo, seed, exp_name, cpu, hidden_dim=220, stacks=1):

    # Verify experiment
    robot_list = ['point', 'car', 'doggo']
    task_list = [
        'goal0', 'goal1', 'goal2', 'button1', 'button2', 'push1', 'push2'
    ]
    algo_list = ['ppo', 'ppo_lagrangian', 'trpo', 'trpo_lagrangian', 'cpo']

    algo = algo.lower()
    task = task.capitalize()
    robot = robot.capitalize()
    assert algo in algo_list, "Invalid algo"
    assert task.lower() in task_list, "Invalid task"
    assert robot.lower() in robot_list, "Invalid robot"

    # Hyperparameters
    exp_name = algo + '_' + robot + task
    if robot == 'Doggo':
        num_steps = 1e8
        steps_per_epoch = 60000
    else:
        num_steps = 3e7  #1e7
        steps_per_epoch = 100000  #def 30000

    stacks = stacks
    #     config = {
    #     'robot_base': 'xmls/point.xml',
    #     'task': 'push',
    #     'observation_flatten': True,
    #     'observe_goal_lidar': True,
    #     'observe_hazards' : True,
    #     'observe_vases': True,
    #     'lidar_num_bins': 16,
    #     'task': 'goal2',
    #     'goal_keepout': 0.305,
    #     'hazards_size': 0.2,
    #     'hazards_keepout': 0.18,
    #     'action_noise': 0.0,
    #     'observe_qpos': True,  # Observe the qpos of the world
    #     'observe_qvel': True,  # Observe the qvel of the robot
    #     'observe_ctrl': True,  # Observe the previous action
    # }

    env_name = 'Safexp-' + robot + task + '-v0'

    with open(f'{env_name}_config.json', 'r') as fp:
        config = json.load(fp)

    env = Engine(config)
    ### for writing config of env
    # env = gym.make(env_name)
    # config = env.config
    # with open(f'{env_name}_config.json', 'w') as fp:
    #     json.dump(config, fp)

    env = SafetyPreprocessedEnv(env)
    env = DummyVecEnv([lambda: env])
    env = VecFrameStack(env, n_stack=stacks)

    p_env = gym.make(env_name)
    #gym.utils.play.play(env, zoom=3)
    actions = [
        # [.5,.3],
        # [.5,-.5],
        # [.5,.7],
        # [-.5,-.3],
        # [-.5,.7],
        # [-1,1],
        # [-1,.3],
        # [-1,-.5],
        # [.5,-1],
        # [.5,-.7],
        [-0.02, .6],
        [-0.02, -.6],
        [0.02, .6],
        [0.02, -.6],
        [0, -.6],  # testing only for rotation
        [0, .6],  # testing only for rotation
        [0.01, 0.0],  # testing only for x acc
        [-0.01, 0.0],  # testing only for x acc
        # [1,.1],
        # [-.5,-1],
    ]

    clock = pygame.time.Clock()
    manual_control = False
    render = True
    step_on_key = True

    ##### model construction #####
    _model_train_freq = 10000
    _rollout_freq = 70000

    obs_dim = np.prod(env.observation_space.shape)
    act_dim = np.prod(env.action_space.shape)

    hidden_dim = hidden_dim
    #hidden_dim = 145 #280       ##115 for rotation only | 140 for x only | 140 for x and rot
    num_networks = 7
    num_elites = 5

    sensor_mse_weights = {
        # 'acc': 0.0000,
        # 'gyro': 0.000,
        # # 'goal_lidar':1,
        # # 'hazards_lidar':1,
        # # 'vases_lidar':1,
        # 'velo' : 1,
        #'action': 1, # this one doesn't matter (comes from action not obs)
    }

    mse_weights = np.ones(shape=(int(obs_dim / stacks) + 1), dtype='float32')
    mse_weights[-1] = 20  # more weight on reward
    mse_weights[-2] = 20  # more weight on reward
    #mse_weights[3:19]=10
    #mse_weights = mse_weights*100

    #### act_dim +=3 because of additional spike info and last action
    model = construct_model(
        obs_dim=obs_dim,
        act_dim=act_dim + 3,
        hidden_dim=hidden_dim,
        num_networks=num_networks,
        num_elites=num_elites,
        mse_weights=mse_weights,
        stacks=stacks,
    )
    pool = {
        'actions':
        np.zeros(shape=(_model_train_freq, act_dim + 3), dtype='float32'),
        'action_processed':
        np.zeros(shape=(_model_train_freq, 1), dtype='float32'),
        'next_observations':
        np.zeros(shape=(_model_train_freq, obs_dim), dtype='float32'),
        'observations':
        np.zeros(shape=(_model_train_freq, obs_dim), dtype='float32'),
        'rewards':
        np.zeros(shape=(_model_train_freq, 1), dtype='float32'),
        'terminals':
        np.zeros(shape=(_model_train_freq, 1), dtype='bool'),
        'sim_states':
        np.empty(shape=(_model_train_freq, ), dtype=object),
    }
    big_pool = {}

    ## fake env
    class StaticFns:
        @staticmethod
        def termination_fn(obs, act, next_obs):
            '''
            safeexp-pointgoal (like the other default safety-gym envs) doesn't terminate
            prematurely other than due to sampling errors etc., therefore just return Falses
            '''
            #assert len(obs.shape) == len(next_obs.shape) == len(act.shape) == 2

            done = np.array([False]).repeat(len(obs))
            done = done[:, None]
            return done

    static_fns = StaticFns  # termination functions for the envs (model can't simulate those)
    fake_env = FakeEnv(model, static_fns, task.lower(), stacks=stacks)
    perturbed_env = PerturbedEnv(p_env, std_inc=0)  #0.03)

    try:
        obs = env.reset()
        obs = np.squeeze(obs)

        action = np.array([0, 0])
        #init_sim_state = env.get_sim_state()
        #p_env.reset(init_sim_state)
        if render: rendered = env.render(mode='human')

        video_size = [100, 100]
        screen = pygame.display.set_mode(video_size)

        print("-----Safety Gym Environment is running-----")
        episode = 0
        episodes_successful = 0
        frame = 0
        total_timesteps = 0
        frames_per_episode = 1000
        ready_to_pool = False
        model_train_metrics = None
        rollouts = 0

        file_name = f'pointgoal2_stack{stacks}_noacc_nogyro_goaldist_nofilter'
        load_data = True
        write_data = False

        if load_data:
            infile = open(file_name, 'rb')
            big_pool = pickle.load(infile)
            infile.close()

            print('---------------')
            print(f'hidden: {hidden_dim}')
            model_train_metrics = _train_model(model,
                                               big_pool,
                                               stacks=stacks,
                                               batch_size=512,
                                               max_epochs=500,
                                               holdout_ratio=0.2)

        while True:
            action_old = action
            if manual_control:

                milliseconds = clock.get_time()
                for event in pygame.event.get():
                    keys = pygame.key.get_pressed()
                    if keys[K_UP]:
                        a = 1.0
                    elif keys[K_DOWN]:
                        a = -1.0
                    else:
                        a = 0

                    if keys[K_LEFT]:
                        s = 0.7
                    elif keys[K_RIGHT]:
                        s = -0.7
                    else:
                        s = 0.0

                action = np.array([
                    a,
                    s,
                ])
            else:
                if total_timesteps % 45 == 0:
                    action = np.array(actions[random.randint(
                        0,
                        len(actions) - 1)])

            #action = np.array([0.0, 0.2])
            #### process action for spike prediction
            processed_act = process_act(action[0], action_old[0])
            processed_act = np.concatenate(
                (action, action_old, [processed_act]))

            obs_old = obs
            obs, reward, done, info = env.step([action])
            obs = np.squeeze(obs)
            if model_train_metrics:
                if rollouts == 10:
                    print('restarting rollouts at 0')
                    rollouts = 0
                if rollouts == 0:
                    f_next_obs, f_rew, f_term, f_info = fake_env.step(
                        obs_old, processed_act, deterministic=True)
                else:
                    f_next_obs, f_rew, f_term, f_info = fake_env.step(
                        f_next_obs, processed_act, deterministic=True)
                print('------------------')
                print('predicted obs:')
                print('goal_l: ', f_next_obs[-55:-39])
                print('hazards_l: ', f_next_obs[-39:-23])
                print('vases_l: ', f_next_obs[-23:-7])
                print('magneto: ', f_next_obs[-7:-4])
                print('velo: ', f_next_obs[-4:-1])
                print('goal_dist: ', f_next_obs[-1:])
                print('rew: ', f_rew)
                print('------------------')
                print('real obs: ')
                print('goal_l: ', obs_unprocesseed['goal_lidar'])
                print('hazards_l: ', obs_unprocesseed['hazards_lidar'])
                print('vases_l: ', obs_unprocesseed['vases_lidar'])
                print('magneto_l: ', obs_unprocesseed['magnetometer'])
                print('velo: ', obs_unprocesseed['velocimeter'])
                print('goal_dist: ', obs_unprocesseed['goal_dist'])
                print('rew: ', reward)
                print('#######################################')

                rollouts += 1

            #sim_state = env.get_sim_state()
            if render: env.render()
            delta_obs = obs - obs_old

            ## store samples
            obs_old_s = obs_old
            obs_next_s = obs

            pool['actions'][total_timesteps] = processed_act
            pool['action_processed'][total_timesteps] = processed_act[4]
            pool['next_observations'][total_timesteps] = obs_next_s
            pool['observations'][total_timesteps] = obs_old_s
            pool['rewards'][total_timesteps] = reward
            pool['terminals'][total_timesteps] = done
            #pool['sim_states'][total_timesteps] = sim_state

            frame += 1
            total_timesteps += 1
            clock.tick(300)

            ##### train model #####
            if total_timesteps % _model_train_freq == 0:
                total_timesteps = 0

                if big_pool:
                    for key in pool:
                        big_pool[key] = np.concatenate(
                            (big_pool[key], pool[key]), axis=0)
                else:
                    for key in pool:
                        big_pool[key] = np.copy(pool[key])

                if write_data:
                    outfile = open(file_name, 'wb')
                    pickle.dump(big_pool, outfile)
                    outfile.close()
                    return

                #### train the model with input:(obs, act), outputs: (rew, delta_obs), inputs are divided into sets with holdout_ratio
                #model.reset()        #@anyboby debug
                print('---------------')
                print(f'hidden: {hidden_dim}')
                print(f'stacks: {stacks}')
                model_train_metrics = _train_model(model,
                                                   big_pool,
                                                   stacks=stacks,
                                                   batch_size=512,
                                                   max_epochs=500,
                                                   holdout_ratio=0.2)

            if total_timesteps % _rollout_freq == 0 and model_train_metrics and not done:
                for i in range(0, 5):
                    roll_start_obs = pool['next_observations'][
                        total_timesteps - 1, :]
                    #roll_start_sim = pool['sim_states'][total_timesteps-1]

                    error_check = np.sum(roll_start_obs - obs)

                    #cur_f = base_next_obs
                    # cur_f = perturbed_env.reset(sim_state=roll_start_sim)
                    # reset_check = np.sum(cur_f-roll_start_obs)
                    cur_r = obs
                    cur_f = cur_r

                    print(f'sum (roll_start_obs-obs: \n {error_check}')
                    # print(f'sum (cur_f - roll_start_obs): \n {reset_check}')

                    for i in range(0, 25):
                        rand_action = np.array(actions[random.randint(
                            0,
                            len(actions) - 1)])
                        if i % 3 == 0:
                            rand_action = np.array(actions[random.randint(
                                0,
                                len(actions) - 1)])

                        processed_act = process_act(rand_action[0],
                                                    action_old[0])
                        processed_act = np.concatenate(
                            (rand_action, action_old, [processed_act]))
                        f_next_obs, f_rew, f_term, f_info = fake_env.step(
                            cur_f, processed_act, deterministic=True)
                        #f_next_obs, f_rew, f_term, f_info = perturbed_env.step(rand_action)
                        r_next_obs, r_rew, r_term, r_info = env.step(
                            rand_action)

                        delta_f = f_next_obs - cur_f
                        delta_r = r_next_obs - cur_r
                        cur_f = f_next_obs
                        cur_r = r_next_obs

                        error = f_next_obs - r_next_obs
                        action_old = action

                        with np.printoptions(precision=3, suppress=True):
                            print(
                                "______________________________________________________"
                            )
                            print("action: ", action)

                            print(error[0:3])
                            print(delta_f[0:3])
                            print(delta_r[0:3])
                            print('---')
                            print(error[3:19])
                            print(delta_f[3:19])
                            print(delta_r[3:19])
                            print('---')
                            print(error[19:22])
                            print(delta_f[19:22])
                            print(delta_r[19:22])
                            print('---')
                            print(error[22:38])
                            print(delta_f[22:38])
                            print(delta_r[22:38])
                            print('---')
                            print(error[38:41])
                            print(delta_f[38:41])
                            print(delta_r[38:41])
                            print('---')
                            print(error[41:57])
                            print(delta_f[41:57])
                            print(delta_r[41:57])
                            print('---')
                            print(error[57:60])
                            print(delta_f[57:60])
                            print(delta_r[57:60])
                            print('---')

                            print(f_rew - r_rew)
                            print("reward: ", reward)
                            print(f'sum error obs diff: \n {np.sum(error)}')
                            print(info)

                        if f_term or r_term:
                            break

            if frame == frames_per_episode:
                print("Success! 1000 frames reached! ending episode")
                episodes_successful += 1
                done = True
                ready_to_pool = False
            if done:

                # plt.plot(np.arange(100), pool['observations'][0:100,55], color='blue')                         # filtered y-vel
                #plt.plot(np.arange(100), env.obs_replay_vy_filt[0:100], color='red' )
                # plt.plot(np.arange(100), pool['observations'][0:100, 1], color='purple' )

                # plt.plot(np.arange(100), pool['actions'][0:100, 0], color='blue' )
                # plt.plot(np.arange(100), pool['action_processed'][0:100, 0], color='red' )
                # plt.plot(np.arange(100), pool['observations'][0:100, 0], color='purple' )

                #plt.plot(np.arange(100), env.obs_replay_acc_y_filt[0:100], color='orange' )
                #plt.plot(np.arange(100), np.array(env.obs_replay_acc_y_real[0:100])/100, color='green' )

                #plt.plot(np.arange(100), env.obs_replay_acc_y_real[400:500], color='green' )

                plt.show()
                frame = 0
                episode += 1
                print("episode {} done".format(episode))
                #env.reset(init_sim_state)
                env.reset()

    finally:
        env.close()
        print("-----Safety Gym Environment is closed-----")
 def test_timer(self):
     ''' Buttons should wait a period before becoming active again '''
     config = {
         'robot_base': 'xmls/point.xml',
         'num_steps': 100,
         'task': 'button',
         'buttons_num': 2,
         'buttons_locations': [(0, 0), (1, 0)],
         'buttons_resampling_delay': 1000,
         'constrain_buttons': True,
         'constrain_indicator': True,
         'robot_locations': [(-1, 0)],
         'robot_rot': 0,
         '_seed': 0,
     }
     # Correct button is pressed, nothing afterwards
     env = Engine(config)
     env.reset()
     info = self.rollout_env(env, gets_goal=True)
     self.assertEqual(info['cost_buttons'], 0.0)
     # Correct button is pressed, then times out and penalties
     config['buttons_resampling_delay'] = 10
     env = Engine(config)
     env.reset()
     info = self.rollout_env(env, gets_goal=True)
     self.assertEqual(info['cost_buttons'], 1.0)
     # Wrong button is pressed, gets penalty
     config['_seed'] = 1
     env = Engine(config)
     env.reset()
     info = self.rollout_env(env)
     self.assertEqual(info['cost_buttons'], 1.0)