コード例 #1
0
    def __init__(self):
        self.max_speed = 1000.
        self.max_torque = 2.
        #self.dt = .05
        #self.viewer = None
        
        self.robot = Robot("single_pendulum.urdf")
        self.robot.time_step = .01
        self.robot.GUI_ENABLED = 0
        self.robot.setupSim()
        self.robot.SINCOS = 1
        self.reward_weights  = [1.,0.0,0.0]
        
        self.step_count = 0
        self.episode_duration=tc.NSTEPS
        

        high = np.array([1., 1., self.max_speed], dtype=np.float32)
        self.action_space = spaces.Box(
            low=-self.max_torque,
            high=self.max_torque, shape=(1,),
            dtype=np.float32
        )
        self.observation_space = spaces.Box(
            low=-high,
            high=high,
            dtype=np.float32
        )
コード例 #2
0
QVALUE_LEARNING_RATE = tc.QVALUE_LEARNING_RATE  # Base learning rate for the Q-value Network
POLICY_LEARNING_RATE = tc.POLICY_LEARNING_RATE  # Base learning rate for the policy network
DECAY_RATE = tc.DECAY_RATE  # Discount factor
UPDATE_RATE = tc.UPDATE_RATE  # Homotopy rate to update the networks
REPLAY_SIZE = tc.REPLAY_SIZE  # Size of replay buffer
BATCH_SIZE = tc.BATCH_SIZE  # Number of points to be fed in stochastic gradient
NH1 = tc.NH1
NH2 = tc.NH2  # Hidden layer size
range_esp = tc.range_esp
time_step = tc.time_step

reward_weights = [1., 0.0, 0.00]

sim_number = 2
RANDSET = 0
env = Robot("single_pendulum.urdf")
env_rend = Robot("single_pendulum.urdf", sim_number=sim_number)  #for rendering

env.RANDSET = 0

step_expl = 0.
epi_expl = 0.

NX = 3  # ... training converges with q,qdot with 2x more neurones.
NU = 1  # Control is dim-1: joint torque


def angle_normalize(x):
    return min(x % (2 * np.pi), abs(x % (2 * np.pi) - 2 * np.pi))

コード例 #3
0
NSTEPS = tc.NSTEPS  # Max episode length
QVALUE_LEARNING_RATE = tc.QVALUE_LEARNING_RATE  # Base learning rate for the Q-value Network
POLICY_LEARNING_RATE = tc.POLICY_LEARNING_RATE  # Base learning rate for the policy network
DECAY_RATE = tc.DECAY_RATE  # Discount factor
UPDATE_RATE = tc.UPDATE_RATE  # Homotopy rate to update the networks
REPLAY_SIZE = tc.REPLAY_SIZE  # Size of replay buffer
BATCH_SIZE = tc.BATCH_SIZE  # Number of points to be fed in stochastic gradient
NH1 = NH2 = tc.NH1  # Hidden layer size
range_esp = tc.range_esp
time_step = tc.time_step

SIM_NUMBER = 1.2

model = DDPG.load("ddpg_pendulum_stb_baselines_" + str(SIM_NUMBER))

robot = Robot("single_pendulum.urdf")
robot.sim_number = 1
robot.RANDSET = 0
robot.GUI_ENABLED = 1
robot.SINCOS = 1
path_log = "/home/pasquale/Desktop/thesis/thesis-code/1D_pendulum/stable_baselines/"
robot.time_step = time_step
robot.setupSim()

#Evaluate policy
#env.robot.stopSim()
#env = PendulumPyB()

#Check convergence
#confronta
#convergenza
コード例 #4
0
start_time = time.time()
model.learn(total_timesteps=NSTEPS * NEPISODES)
end_time = time.time()
elapsed_time = end_time - start_time
model.save("ddpg_pendulum_stb_baselines_" + str(SIM_NUMBER))
print('elapsed ' + str(elapsed_time) + 's')
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=21)
env.robot.stopSim()

#mean_reward, std_reward = evaluate_policy(model, env_eval, n_eval_episodes=10)
#print(f'Mean reward: {mean_reward} +/- {std_reward:.2f}')

### save video
# model = DDPG.load("ddpg_pendulum_stb_baselines")

robot = Robot("double_pendulum.urdf")
robot.sim_number = SIM_NUMBER
RANDSET = 0
robot.LOGDATA = 1
robot.SINCOS = 1
robot.video_path = "/home/pasquale/Desktop/thesis/thesis-code/2D_Acrobot/stable_baselines/Video"
path_log = "/home/pasquale/Desktop/thesis/thesis-code/2D_Acrobot/stable_baselines/"
robot.time_step = time_step
robot.setupSim()
for i in range(NSTEPS):

    obs = np.array([
        robot.states_sincos[1][0], robot.states_sincos[1][1],
        robot.states_dot[1][3], robot.states_sincos[2][0],
        robot.states_sincos[2][1], robot.states_dot[2][3]
    ])
コード例 #5
0
#           second revolute [0.023,0.,0.1]
#               center of gravity of link 2 [-0.0050107, 1.9371E-10, 0.10088]
#
# since the rotation is on x and all joints RF have the same orientation and also link2 CoG, consider only y and z for the length (actually only z since y is always 0)
#
# tip of Acrobot is considered to be in the center of gravity of link2
# -> goal position 

length = .035 + .01 + 0.1088
goal = np.array([0,0,length])





env                 = Robot("double_pendulum.urdf")       
env_rend            = Robot("double_pendulum.urdf",sim_number=SIM_NUMBER) #for rendering


env.GUI_ENABLED=0 
env.RANDSET = 0
env.SINCOS = 1
env.time_step= time_step
env.actuated_index =[2]
env.max_torque=5.0

env_rend.SINCOS = 1
env_rend.GUI_ENABLED = 1
env_rend.actuated_index = [2]