コード例 #1
0
            rsum += reward
            #print(rsum)
        print(x)
        if verbose: print('Lasted ',i,' timestep -- total reward:',rsum)
    
    signal.signal(signal.SIGTSTP, lambda x,y:rendertrial()) # Roll-out when CTRL-Z is pressed
 

    h_rwd = []
    h_qva = []
    h_ste = []    
 
 
    ##  Envirnonment Configuration ##
    
    env.setupSim() 
    
    
    
    ##  Training Loop ##
    start_time = time.time()
    for episode in range(1,NEPISODES):
        env.resetRobot()
        x = np.array([[env.states_sincos[1][0],env.states_sincos[1][1],env.states_dot[1][3],
                       env.states_sincos[2][0],env.states_sincos[2][1],env.states_dot[2][3]]])
        
        rsum = 0.0 
 
        for step in range(NSTEPS):
            u       = sess.run(policy.policy, feed_dict={ policy.x: x }) # Greedy policy ...
            #u      += np.random.uniform(-range_esp,range_esp) / (1. + episode/epi_expl + step/step_expl )   #add gaussian noise                      # ... with noise
コード例 #2
0
SIM_NUMBER = 1.5
model_save = "/home/pasquale/Desktop/thesis/thesis-code/2D_Acrobot/ddpg/trined_agents/DDPG_saved_" + str(
    SIM_NUMBER) + ".chkpt"

sess = tf.compat.v1.InteractiveSession()
tf.compat.v1.global_variables_initializer().run()
tf.compat.v1.train.Saver().restore(sess, model_save)

robot = Robot("single_pendulum.urdf")
robot.sim_number = SIM_NUMBER
robot.RANDSET = 0
robot.GUI_ENABLED = 0
robot.SINCOS = 1
path_log = "/home/pasquale/Desktop/thesis/thesis-code/1D_pendulum/continuous/"
robot.time_step = time_step
robot.setupSim()

#Evaluate policy
#env.robot.stopSim()
#env = PendulumPyB()

#Check convergence

c = 100000000

h_sum_last = 0

angles_list = []
angles_list.append(angle_normalize(robot.states[1][3]) * 180 / np.pi)
vel_ang_list = []
vel_ang_list.append(robot.states_dot[1][3])
コード例 #3
0
class PendulumPyB(gym.Env):

    metadata = {'render.modes': ['human']}

    def __init__(self):
        self.max_speed = 1000.
        self.max_torque = 2.
        #self.dt = .05
        #self.viewer = None

        self.robot = Robot("single_pendulum.urdf")
        self.robot.time_step = .01
        self.robot.GUI_ENABLED = 0
        self.robot.setupSim()
        self.robot.SINCOS = 1
        self.reward_weights = [1., 0.0, 0.0]

        self.step_count = 0
        self.episode_duration = tc.NSTEPS

        high = np.array([1., 1., self.max_speed], dtype=np.float32)
        self.action_space = spaces.Box(low=-self.max_torque,
                                       high=self.max_torque,
                                       shape=(1, ),
                                       dtype=np.float32)
        self.observation_space = spaces.Box(low=-high,
                                            high=high,
                                            dtype=np.float32)

    def step(self, u):

        #dt = self.dt
        self.step_count += 1
        end_episode = False
        #u = np.clip(u, -self.max_torque, self.max_torque)[0]
        u.tolist()
        self.robot.simulateDyn([u[0]])
        reward = -angle_normalize(
            self.robot.states[1][3]
        )**2 * self.reward_weights[0] - self.robot.states_dot[1][
            3]**2 * self.reward_weights[1] - self.reward_weights[2] * (u**2)

        if not self.step_count % self.episode_duration: end_episode = True
        return self._get_obs(), reward, end_episode, {}

    def reset(self):
        self.robot.resetRobot()
        return self._get_obs()

    def _get_obs(self):
        self.robot.observeState()
        return np.array([
            self.robot.states_sincos[1][0], self.robot.states_sincos[1][1],
            self.robot.states_dot[1][3]
        ])

    """ def render(self, mode='human'):
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(500, 500)
            self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2)
            rod = rendering.make_capsule(1, .2)
            rod.set_color(.8, .3, .3)
            self.pole_transform = rendering.Transform()
            rod.add_attr(self.pole_transform)
            self.viewer.add_geom(rod)
            axle = rendering.make_circle(.05)
            axle.set_color(0, 0, 0)
            self.viewer.add_geom(axle)
            fname = path.join(path.dirname(__file__), "assets/clockwise.png")
            self.img = rendering.Image(fname, 1., 1.)
            self.imgtrans = rendering.Transform()
            self.img.add_attr(self.imgtrans)

        self.viewer.add_onetime(self.img)
        self.pole_transform.set_rotation(self.state[0] + np.pi / 2)
        if self.last_u:
            self.imgtrans.scale = (-self.last_u / 2, np.abs(self.last_u) / 2)

        return self.viewer.render(return_rgb_array=mode == 'rgb_array') """
    """ def close(self):