Example #1
0
    def __init__(self):
        self.max_speed = 1000.
        self.max_torque = 2.
        #self.dt = .05
        #self.viewer = None
        
        self.robot = Robot("single_pendulum.urdf")
        self.robot.time_step = .01
        self.robot.GUI_ENABLED = 0
        self.robot.setupSim()
        self.robot.SINCOS = 1
        self.reward_weights  = [1.,0.0,0.0]
        
        self.step_count = 0
        self.episode_duration=tc.NSTEPS
        

        high = np.array([1., 1., self.max_speed], dtype=np.float32)
        self.action_space = spaces.Box(
            low=-self.max_torque,
            high=self.max_torque, shape=(1,),
            dtype=np.float32
        )
        self.observation_space = spaces.Box(
            low=-high,
            high=high,
            dtype=np.float32
        )
Example #2
0
QVALUE_LEARNING_RATE = tc.QVALUE_LEARNING_RATE  # Base learning rate for the Q-value Network
POLICY_LEARNING_RATE = tc.POLICY_LEARNING_RATE  # Base learning rate for the policy network
DECAY_RATE = tc.DECAY_RATE  # Discount factor
UPDATE_RATE = tc.UPDATE_RATE  # Homotopy rate to update the networks
REPLAY_SIZE = tc.REPLAY_SIZE  # Size of replay buffer
BATCH_SIZE = tc.BATCH_SIZE  # Number of points to be fed in stochastic gradient
NH1 = tc.NH1
NH2 = tc.NH2  # Hidden layer size
range_esp = tc.range_esp
time_step = tc.time_step

reward_weights = [1., 0.0, 0.00]

sim_number = 2
RANDSET = 0
env = Robot("single_pendulum.urdf")
env_rend = Robot("single_pendulum.urdf", sim_number=sim_number)  #for rendering

env.RANDSET = 0

step_expl = 0.
epi_expl = 0.

NX = 3  # ... training converges with q,qdot with 2x more neurones.
NU = 1  # Control is dim-1: joint torque


def angle_normalize(x):
    return min(x % (2 * np.pi), abs(x % (2 * np.pi) - 2 * np.pi))

Example #3
0
NSTEPS = tc.NSTEPS  # Max episode length
QVALUE_LEARNING_RATE = tc.QVALUE_LEARNING_RATE  # Base learning rate for the Q-value Network
POLICY_LEARNING_RATE = tc.POLICY_LEARNING_RATE  # Base learning rate for the policy network
DECAY_RATE = tc.DECAY_RATE  # Discount factor
UPDATE_RATE = tc.UPDATE_RATE  # Homotopy rate to update the networks
REPLAY_SIZE = tc.REPLAY_SIZE  # Size of replay buffer
BATCH_SIZE = tc.BATCH_SIZE  # Number of points to be fed in stochastic gradient
NH1 = NH2 = tc.NH1  # Hidden layer size
range_esp = tc.range_esp
time_step = tc.time_step

SIM_NUMBER = 1.2

model = DDPG.load("ddpg_pendulum_stb_baselines_" + str(SIM_NUMBER))

robot = Robot("single_pendulum.urdf")
robot.sim_number = 1
robot.RANDSET = 0
robot.GUI_ENABLED = 1
robot.SINCOS = 1
path_log = "/home/pasquale/Desktop/thesis/thesis-code/1D_pendulum/stable_baselines/"
robot.time_step = time_step
robot.setupSim()

#Evaluate policy
#env.robot.stopSim()
#env = PendulumPyB()

#Check convergence
#confronta
#convergenza
Example #4
0
end_time=time.time()
elapsed_time = end_time-start_time
model.save("ddpg_pendulum_stb_baselines_"+str(SIM_NUMBER))
print('elapsed '+str(elapsed_time)+'s')
mean_reward,std_reward = evaluate_policy(model,env,n_eval_episodes = 21)
env.robot.stopSim()


#mean_reward, std_reward = evaluate_policy(model, env_eval, n_eval_episodes=10)
#print(f'Mean reward: {mean_reward} +/- {std_reward:.2f}')

  
### save video
# model = DDPG.load("ddpg_pendulum_stb_baselines")

robot = Robot("single_pendulum.urdf")
robot.sim_number=SIM_NUMBER
RANDSET =0
robot.LOGDATA = 1
robot.SINCOS=1
robot.video_path = "/home/pasquale/Desktop/thesis/thesis-code/1D_pendulum/stable_baselines/Video"
path_log= "/home/pasquale/Desktop/thesis/thesis-code/1D_pendulum/stable_baselines/"
robot.time_step = time_step
robot.setupSim()
for i in range(NSTEPS):
        
         obs = np.array([robot.states_sincos[1][0],robot.states_sincos[1][1],robot.states_dot[1][3]])
         action, _states = model.predict(obs)
         action=action.tolist()
         robot.simulateDyn(action)
         time.sleep(0.05)
Example #5
0
#           second revolute [0.023,0.,0.1]
#               center of gravity of link 2 [-0.0050107, 1.9371E-10, 0.10088]
#
# since the rotation is on x and all joints RF have the same orientation and also link2 CoG, consider only y and z for the length (actually only z since y is always 0)
#
# tip of Acrobot is considered to be in the center of gravity of link2
# -> goal position 

length = .035 + .01 + 0.1088
goal = np.array([0,0,length])





env                 = Robot("double_pendulum.urdf")       
env_rend            = Robot("double_pendulum.urdf",sim_number=SIM_NUMBER) #for rendering


env.GUI_ENABLED=0 
env.RANDSET = 0
env.SINCOS = 1
env.time_step= time_step
env.actuated_index =[2]
env.max_torque=5.0

env_rend.SINCOS = 1
env_rend.GUI_ENABLED = 1
env_rend.actuated_index = [2]

Example #6
0
UPDATE_RATE             = tc.UPDATE_RATE                # Homotopy rate to update the networks
REPLAY_SIZE             = tc.REPLAY_SIZE                # Size of replay buffer
BATCH_SIZE              = tc.BATCH_SIZE                 # Number of points to be fed in stochastic gradient
NH1                     = tc.NH1
NH2                     = tc.NH2                        # Hidden layer size
range_esp               = tc.range_esp                    # Hidden layer size
time_step               = tc.time_step

reward_weights  = [1.,0.0,0.00]




SIM_NUMBER = 00
RANDSET =0
env                 = Robot("single_pendulum.urdf")       
env_rend            = Robot("single_pendulum.urdf",sim_number=SIM_NUMBER) #for rendering







step_expl = 0.
epi_expl = 0.

NX                  = 3          # ... training converges with q,qdot with 2x more neurones.
NU                  = 1            # Control is dim-1: joint torque

def angle_normalize(x):
Example #7
0
NSTEPS = tc.NSTEPS  # Max episode length
QVALUE_LEARNING_RATE = tc.QVALUE_LEARNING_RATE  # Base learning rate for the Q-value Network
POLICY_LEARNING_RATE = tc.POLICY_LEARNING_RATE  # Base learning rate for the policy network
DECAY_RATE = tc.DECAY_RATE  # Discount factor
UPDATE_RATE = tc.UPDATE_RATE  # Homotopy rate to update the networks
REPLAY_SIZE = tc.REPLAY_SIZE  # Size of replay buffer
BATCH_SIZE = tc.BATCH_SIZE  # Number of points to be fed in stochastic gradient
NH1 = tc.NH1
NH2 = tc.NH2  # Hidden layer size
range_esp = tc.range_esp

reward_weights = [1., 0.0, 0.00]

sim_number = 12
RANDSET = 0
env = Robot("single_pendulum.urdf")
env_rend = Robot("single_pendulum.urdf", sim_number=sim_number)  #for rendering

env.RANDSET = 0

step_expl = 0.
epi_expl = 0.

NX = 3  # ... training converges with q,qdot with 2x more neurones.
NU = 1  # Control is dim-1: joint torque


def angle_normalize(x):
    return min(x % (2 * np.pi), abs(x % (2 * np.pi) - 2 * np.pi))

Example #8
0
class PendulumPyB(gym.Env):

    metadata = {'render.modes': ['human']}

    def __init__(self):
        self.max_speed = 1000.
        self.max_torque = 2.
        #self.dt = .05
        #self.viewer = None

        self.robot = Robot("single_pendulum.urdf")
        self.robot.time_step = .01
        self.robot.GUI_ENABLED = 0
        self.robot.setupSim()
        self.robot.SINCOS = 1
        self.reward_weights = [1., 0.0, 0.0]

        self.step_count = 0
        self.episode_duration = tc.NSTEPS

        high = np.array([1., 1., self.max_speed], dtype=np.float32)
        self.action_space = spaces.Box(low=-self.max_torque,
                                       high=self.max_torque,
                                       shape=(1, ),
                                       dtype=np.float32)
        self.observation_space = spaces.Box(low=-high,
                                            high=high,
                                            dtype=np.float32)

    def step(self, u):

        #dt = self.dt
        self.step_count += 1
        end_episode = False
        #u = np.clip(u, -self.max_torque, self.max_torque)[0]
        u.tolist()
        self.robot.simulateDyn([u[0]])
        reward = -angle_normalize(
            self.robot.states[1][3]
        )**2 * self.reward_weights[0] - self.robot.states_dot[1][
            3]**2 * self.reward_weights[1] - self.reward_weights[2] * (u**2)

        if not self.step_count % self.episode_duration: end_episode = True
        return self._get_obs(), reward, end_episode, {}

    def reset(self):
        self.robot.resetRobot()
        return self._get_obs()

    def _get_obs(self):
        self.robot.observeState()
        return np.array([
            self.robot.states_sincos[1][0], self.robot.states_sincos[1][1],
            self.robot.states_dot[1][3]
        ])

    """ def render(self, mode='human'):
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(500, 500)
            self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2)
            rod = rendering.make_capsule(1, .2)
            rod.set_color(.8, .3, .3)
            self.pole_transform = rendering.Transform()
            rod.add_attr(self.pole_transform)
            self.viewer.add_geom(rod)
            axle = rendering.make_circle(.05)
            axle.set_color(0, 0, 0)
            self.viewer.add_geom(axle)
            fname = path.join(path.dirname(__file__), "assets/clockwise.png")
            self.img = rendering.Image(fname, 1., 1.)
            self.imgtrans = rendering.Transform()
            self.img.add_attr(self.imgtrans)

        self.viewer.add_onetime(self.img)
        self.pole_transform.set_rotation(self.state[0] + np.pi / 2)
        if self.last_u:
            self.imgtrans.scale = (-self.last_u / 2, np.abs(self.last_u) / 2)

        return self.viewer.render(return_rgb_array=mode == 'rgb_array') """
    """ def close(self):