Beispiel #1
0
    def __init__(self, args, env, sess):
        self.firstIter = 1
        self.count = 1
        self.args = args
        self.sess = sess
        self.env = env
        self.torque_bound = 15.
        self.max_speed = 60.

        #Set up observation space and action space
        self.observation_space = env.observation_space
        self.action_space = env.action_space
        print('Observation space', self.observation_space)
        print('Action space', self.action_space)

        #Determine dimensions of observation & action space
        self.observation_size = self.env.observation_space.shape[0]
        self.action_size = self.action_space.shape[0]

        # Build neural network model for observations/actions
        self.build_model()

        # Build barrier function model
        cbf.build_barrier(self)

        # Build GP model of dynamics
        dynamics_gp.build_GP_model(self)
Beispiel #2
0
def main(args, reward_result):

    with tf.Session() as sess:

        env = gym.make(args['env'])
        np.random.seed(int(args['random_seed']))
        tf.set_random_seed(int(args['random_seed']))
        env.seed(int(args['random_seed']))

        # Set environment parameters for pendulum
        env.unwrapped.max_torque = 15.
        env.unwrapped.max_speed = 60.
        env.unwrapped.action_space = spaces.Box(low=-env.unwrapped.max_torque,
                                                high=env.unwrapped.max_torque,
                                                shape=(1, ))
        high = np.array([1., 1., env.unwrapped.max_speed])
        env.unwrapped.observation_space = spaces.Box(low=-high, high=high)

        state_dim = env.observation_space.shape[0]
        action_dim = env.action_space.shape[0]
        action_bound = env.action_space.high
        # Ensure action bound is symmetric
        assert (env.action_space.high == -env.action_space.low)

        actor = ActorNetwork(sess, state_dim, action_dim, action_bound,
                             float(args['actor_lr']), float(args['tau']),
                             int(args['minibatch_size']))

        critic = CriticNetwork(sess, state_dim, action_dim,
                               float(args['critic_lr']), float(args['tau']),
                               float(args['gamma']),
                               actor.get_num_trainable_vars())

        actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim))

        agent = LEARNER(env)
        cbf.build_barrier(agent)
        dynamics_gp.build_GP_model(agent)
        agent.bar_comp = BARRIER(sess, 3, 1)

        [summary_ops, summary_vars,
         paths] = train(sess, env, args, actor, critic, actor_noise,
                        reward_result, agent)

        return [summary_ops, summary_vars, paths]
Beispiel #3
0
    def __init__(self, args, env, sess):
        self.args = args
        self.sess = sess
        self.env = env
        self.firstIter = 1
        self.torque_bound = 100

        #Determine dimensions of observation & action space
        self.observation_size = 15
        self.action_size = 1

        # Build neural network model for observations/actions
        self.build_model()

        # Build barrier function model
        cbf.build_barrier(self)

        # Build GP model
        dynamics_gp.build_GP_model(self)
Beispiel #4
0
    def __init__(self, env, sess):
        self.firstIter = 1
        self.count = 1
        self.env = env
        self.torque_bound = 100.
        '''
        #Set up observation space and action space
        self.observation_space = env.observation_space
        self.action_space = env.action_space
        print('Observation space', self.observation_space)
        print('Action space', self.action_space)
        '''

        #Determine dimensions of observation & action space
        self.observation_size = 15
        self.action_size = 1

        # Build barrier function model
        cbf.build_barrier(self)

        # Build GP model of dynamics
        dynamics_gp.build_GP_model(self)

        self.bar_comp = BARRIER(sess, 15, 1)
Beispiel #5
0
def main(args, reward_result, log_path):
    with tf.Session() as sess:
        env = gym.make(args['env'])
        np.random.seed(int(args['random_seed']))
        tf.set_random_seed(int(args['random_seed']))
        env.seed(int(args['random_seed']))

        # Create the log files
        if not os.path.isdir(log_path):
            os.mkdir(log_path)
        log_save_name = log_path + '/episode_performance.csv'
        f = open(log_save_name, "w+")
        f.write(
            "episode number, steps in evaluation, accumulated reward, done \n")
        f.close()
        log_save_name_cbf = log_path + '/episode_cbf_performance.csv'
        f = open(log_save_name_cbf, "w+")
        f.write(
            "episode number, steps in evaluation, accumulated reward, done \n")
        f.close()

        # Set environment parameters for pendulum
        env.unwrapped.max_torque = 15.
        env.unwrapped.max_speed = 60.
        env.unwrapped.action_space = spaces.Box(low=-env.unwrapped.max_torque,
                                                high=env.unwrapped.max_torque,
                                                shape=(1, ))
        high = np.array([1., 1., env.unwrapped.max_speed])
        env.unwrapped.observation_space = spaces.Box(low=-high, high=high)

        state_dim = env.observation_space.shape[0]
        action_dim = env.action_space.shape[0]
        action_bound = env.action_space.high
        # Ensure action bound is symmetric
        assert (env.action_space.high == -env.action_space.low)

        actor = ActorNetwork(sess, state_dim, action_dim, action_bound,
                             float(args['actor_lr']), float(args['tau']),
                             int(args['minibatch_size']))

        critic = CriticNetwork(sess, state_dim, action_dim,
                               float(args['critic_lr']), float(args['tau']),
                               float(args['gamma']),
                               actor.get_num_trainable_vars())

        actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim))

        agent = LEARNER(env)
        cbf.build_barrier(agent)
        dynamics_gp.build_GP_model(agent)
        agent.bar_comp = BARRIER(sess, 3, 1)

        [summary_ops, summary_vars,
         paths] = train(sess, env, args, actor, critic, actor_noise,
                        reward_result, agent, log_save_name, log_save_name_cbf)

        # Evaluate the final model 100 times to get a better idea of the final model's performance
        f = open(args['log_path'] + '/final_eval.csv', "w+")
        f.write("reward, steps, done, safe\n")
        episode_length = int(args['max_episode_len'])
        for k in range(100):
            steps, reward, done, safe = evaluate(env, actor, episode_length)
            f.write(
                str(reward) + ', ' + str(steps) + ', ' + str(done) + ', ' +
                str(safe) + '\n')
        f.close()

        # Evaluate the final model 100 times to get a better idea of the final model's performance
        f = open(args['log_path'] + '/final_cbf_eval.csv', "w+")
        f.write("reward, steps, done, safe\n")
        episode_length = int(args['max_episode_len'])
        for k in range(100):
            steps, reward, done, safe = evaluate_with_cbf(
                env, actor, agent, episode_length)
            f.write(
                str(reward) + ', ' + str(steps) + ', ' + str(done) + ', ' +
                str(safe) + '\n')
        f.close()

        return [summary_ops, summary_vars, paths]