def __init__(self, args, env, sess): self.firstIter = 1 self.count = 1 self.args = args self.sess = sess self.env = env self.torque_bound = 15. self.max_speed = 60. #Set up observation space and action space self.observation_space = env.observation_space self.action_space = env.action_space print('Observation space', self.observation_space) print('Action space', self.action_space) #Determine dimensions of observation & action space self.observation_size = self.env.observation_space.shape[0] self.action_size = self.action_space.shape[0] # Build neural network model for observations/actions self.build_model() # Build barrier function model cbf.build_barrier(self) # Build GP model of dynamics dynamics_gp.build_GP_model(self)
def main(args, reward_result): with tf.Session() as sess: env = gym.make(args['env']) np.random.seed(int(args['random_seed'])) tf.set_random_seed(int(args['random_seed'])) env.seed(int(args['random_seed'])) # Set environment parameters for pendulum env.unwrapped.max_torque = 15. env.unwrapped.max_speed = 60. env.unwrapped.action_space = spaces.Box(low=-env.unwrapped.max_torque, high=env.unwrapped.max_torque, shape=(1, )) high = np.array([1., 1., env.unwrapped.max_speed]) env.unwrapped.observation_space = spaces.Box(low=-high, high=high) state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] action_bound = env.action_space.high # Ensure action bound is symmetric assert (env.action_space.high == -env.action_space.low) actor = ActorNetwork(sess, state_dim, action_dim, action_bound, float(args['actor_lr']), float(args['tau']), int(args['minibatch_size'])) critic = CriticNetwork(sess, state_dim, action_dim, float(args['critic_lr']), float(args['tau']), float(args['gamma']), actor.get_num_trainable_vars()) actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim)) agent = LEARNER(env) cbf.build_barrier(agent) dynamics_gp.build_GP_model(agent) agent.bar_comp = BARRIER(sess, 3, 1) [summary_ops, summary_vars, paths] = train(sess, env, args, actor, critic, actor_noise, reward_result, agent) return [summary_ops, summary_vars, paths]
def __init__(self, args, env, sess): self.args = args self.sess = sess self.env = env self.firstIter = 1 self.torque_bound = 100 #Determine dimensions of observation & action space self.observation_size = 15 self.action_size = 1 # Build neural network model for observations/actions self.build_model() # Build barrier function model cbf.build_barrier(self) # Build GP model dynamics_gp.build_GP_model(self)
def __init__(self, env, sess): self.firstIter = 1 self.count = 1 self.env = env self.torque_bound = 100. ''' #Set up observation space and action space self.observation_space = env.observation_space self.action_space = env.action_space print('Observation space', self.observation_space) print('Action space', self.action_space) ''' #Determine dimensions of observation & action space self.observation_size = 15 self.action_size = 1 # Build barrier function model cbf.build_barrier(self) # Build GP model of dynamics dynamics_gp.build_GP_model(self) self.bar_comp = BARRIER(sess, 15, 1)
def main(args, reward_result, log_path): with tf.Session() as sess: env = gym.make(args['env']) np.random.seed(int(args['random_seed'])) tf.set_random_seed(int(args['random_seed'])) env.seed(int(args['random_seed'])) # Create the log files if not os.path.isdir(log_path): os.mkdir(log_path) log_save_name = log_path + '/episode_performance.csv' f = open(log_save_name, "w+") f.write( "episode number, steps in evaluation, accumulated reward, done \n") f.close() log_save_name_cbf = log_path + '/episode_cbf_performance.csv' f = open(log_save_name_cbf, "w+") f.write( "episode number, steps in evaluation, accumulated reward, done \n") f.close() # Set environment parameters for pendulum env.unwrapped.max_torque = 15. env.unwrapped.max_speed = 60. env.unwrapped.action_space = spaces.Box(low=-env.unwrapped.max_torque, high=env.unwrapped.max_torque, shape=(1, )) high = np.array([1., 1., env.unwrapped.max_speed]) env.unwrapped.observation_space = spaces.Box(low=-high, high=high) state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] action_bound = env.action_space.high # Ensure action bound is symmetric assert (env.action_space.high == -env.action_space.low) actor = ActorNetwork(sess, state_dim, action_dim, action_bound, float(args['actor_lr']), float(args['tau']), int(args['minibatch_size'])) critic = CriticNetwork(sess, state_dim, action_dim, float(args['critic_lr']), float(args['tau']), float(args['gamma']), actor.get_num_trainable_vars()) actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim)) agent = LEARNER(env) cbf.build_barrier(agent) dynamics_gp.build_GP_model(agent) agent.bar_comp = BARRIER(sess, 3, 1) [summary_ops, summary_vars, paths] = train(sess, env, args, actor, critic, actor_noise, reward_result, agent, log_save_name, log_save_name_cbf) # Evaluate the final model 100 times to get a better idea of the final model's performance f = open(args['log_path'] + '/final_eval.csv', "w+") f.write("reward, steps, done, safe\n") episode_length = int(args['max_episode_len']) for k in range(100): steps, reward, done, safe = evaluate(env, actor, episode_length) f.write( str(reward) + ', ' + str(steps) + ', ' + str(done) + ', ' + str(safe) + '\n') f.close() # Evaluate the final model 100 times to get a better idea of the final model's performance f = open(args['log_path'] + '/final_cbf_eval.csv', "w+") f.write("reward, steps, done, safe\n") episode_length = int(args['max_episode_len']) for k in range(100): steps, reward, done, safe = evaluate_with_cbf( env, actor, agent, episode_length) f.write( str(reward) + ', ' + str(steps) + ', ' + str(done) + ', ' + str(safe) + '\n') f.close() return [summary_ops, summary_vars, paths]