Esempio n. 1
0
class Run():
    def __init__(self, config, dir_path):
        self.dir_path = dir_path
        self.config = config
        self.config.load_configuration(dir_path)
        self.config.print_configuration()

        self.PD_freq = self.config.conf['LLC-frequency']
        self.Physics_freq = self.config.conf['Physics-frequency']
        self.network_freq = self.config.conf['HLC-frequency']
        self.sampling_skip = int(self.PD_freq / self.network_freq)

        self.reward_decay = 1.0
        self.reward_scale = config.conf['reward-scale']
        self.reward_scale = self.reward_scale / float(
            self.sampling_skip)  # /10.0#normalizing reward to 1
        self.max_time = 10  #16
        self.max_step_per_episode = int(self.max_time * self.network_freq)

        self.env = Valkyrie(
            max_time=self.max_time,
            renders=True,
            initial_gap_time=0.1,
            PD_freq=self.PD_freq,
            Physics_freq=self.Physics_freq,
            Kp=config.conf['Kp'],
            Kd=config.conf['Kd'],
            bullet_default_PD=config.conf['bullet-default-PD'],
            controlled_joints_list=config.conf['controlled-joints'],
            logFileName=dir_path,
            isEnableSelfCollision=True)

        config.conf['state-dim'] = self.env.stateNumber + 1
        self.agent = Agent(self.env, self.config)
        self.agent.load_weight(dir_path + '/best_network')

        self.logging = logger(dir_path)

        self.episode_count = 0
        self.step_count = 0
        self.train_iter_count = 0

        self.best_reward = 0
        self.best_episode = 0
        self.best_train_iter = 0

        self.control = Control(self.config, self.env)

        # create new network

        self.force = [0, 0, 0]
        self.force_chest = [0, 0,
                            0]  # max(0,force_chest[1]-300*1.0 / EXPLORE)]
        self.force_pelvis = [0, 0, 0]

        # img = [[1,2,3]*50]*100
        img = np.zeros((240, 320, 3))
        self.image = plt.imshow(img, interpolation='none', animated=True)

        self.ax = plt.gca()
        plt.axis('off')

        self.image_list = []

        self.ref_motion = Motion(config=self.config, dsr_gait_freq=0.6)

    def test(self):
        total_reward = 0
        for i in range(self.config.conf['test-num']):  #
            quat = self.ref_motion.euler_to_quat(0, 0, 0)
            # quat = self.ref_motion.euler_to_quat(0, 0.2616, 0)
            _ = self.env._reset(Kp=self.config.conf['Kp'],
                                Kd=self.config.conf['Kd'],
                                base_pos_nom=[0, 0, 1.175],
                                base_orn_nom=quat,
                                fixed_base=False,
                                base_ang_vel_nom=[0, 0, 0])
            # state = self.env._reset(Kp=self.config.conf['Kp'], Kd=self.config.conf['Kd'], base_pos_nom=[0, 0, 1.175], fixed_base=False)
            # q_nom = self.ref_motion.ref_motion_dict()
            # q_dot_nom = self.ref_motion.ref_motion_vel_dict()
            # base_orn_nom = self.ref_motion.get_base_orn()
            # state = self.env._reset(Kp=self.config.conf['Kp'], Kd=self.config.conf['Kd'], q_nom=q_nom, base_orn_nom=base_orn_nom, base_pos_nom=[0, 0, 1.175], fixed_base=False)
            # self.env._setupCamera()
            self.env.startRendering()
            self.env._startLoggingVideo()
            self.ref_motion.reset(index=0)
            # self.ref_motion.random_count()

            self.control.reset(
                w_imitation=self.config.conf['imitation-weight'],
                w_task=self.config.conf['task-weight'])

            for step in range(self.max_step_per_episode):
                # self.env._setupCamera()
                t = time.time()
                gait_phase = self.ref_motion.count / self.ref_motion.dsr_length
                ref_angle = self.ref_motion.ref_joint_angle()
                ref_vel = self.ref_motion.ref_joint_vel()

                state = self.env.getExtendedObservation()
                state = np.squeeze(state)
                state = np.append(state, [gait_phase])
                # state = np.append(state, [np.sin(np.pi * 2 * gait_phase), np.cos(np.pi * 2 * gait_phase)])
                # state = np.append(state,[0,0])

                action, actor_info = self.agent.agent.actor.get_action(state)
                mean = actor_info['mean']
                logstd = actor_info['logstd']
                action = mean
                # action = np.clip(action, self.config.conf['actor-output-bounds'][0],
                #                  self.config.conf['actor-output-bounds'][1])
                action = np.array([action]) if len(
                    np.shape(action)) == 0 else np.array(action)

                f = self.env.rejectableForce_xy(1.0 / self.network_freq)
                rgb = self.env._render(roll=0, pitch=0, yaw=0)
                print(rgb.shape)
                self.image_list.append(rgb)

                # action = self.control.rescale(ref_action, self.config.conf['action-bounds'],
                #                               self.config.conf['actor-output-bounds'])
                self.control.update_ref(ref_angle, ref_vel, [])
                next_state, reward, terminal, info = self.control.control_step(
                    action, self.force, gait_phase)
                self.ref_motion.index_count()

                # if step>= 1*self.network_freq and step< 1.3*self.network_freq:
                #     self.force = [800,0,0]
                # else:
                #     self.force = [0,0,0]

                total_reward += reward

                ob = self.env.getObservation()
                ob_filtered = self.env.getFilteredObservation()
                # for l in range(len(ob)):
                #     self.logging.add_run('observation' + str(l), ob[l])
                #     self.logging.add_run('filtered_observation' + str(l), ob_filtered[l])
                self.logging.add_run('action', action)
                self.logging.add_run('ref_action', ref_angle)
                joint_angle = self.control.get_joint_angle()
                self.logging.add_run('joint_angle', joint_angle)
                self.logging.add_run('phase', gait_phase)
                readings = self.env.getExtendedReading()
                for key, value in readings.items():
                    self.logging.add_run(key, value)
                self.logging.add_run('task_reward', info['task_reward'])
                self.logging.add_run('imitation_reward',
                                     info['imitation_reward'])
                self.logging.add_run('total_reward', info['total_reward'])
                self.logging.add_run('reward_joint_angle',
                                     info['reward_joint_angle'])
                self.logging.add_run('reward_joint_vel',
                                     info['reward_joint_vel'])
                #
                # while 1:
                #     if(time.time()-t)>1.0/self.network_freq:
                #         break

                if terminal:
                    break
            self.env._stopLoggingVideo()
            self.env.stopRendering()

        ave_reward = total_reward / self.config.conf['test-num']
        print(ave_reward)
        self.logging.save_run()

        clip = ImageSequenceClip(self.image_list, fps=25)
        clip.write_gif(self.dir_path + '/test.gif')
        clip.write_videofile(self.dir_path + '/test.mp4', fps=25, audio=False)
Esempio n. 2
0
class Run():
    def __init__(self, config, dir_path):
        self.dir_path = dir_path
        self.config = config

        self.PD_freq = self.config.conf['LLC-frequency']
        self.Physics_freq = self.config.conf['Physics-frequency']
        self.network_freq = self.config.conf['HLC-frequency']
        self.sampling_skip = int(self.PD_freq/self.network_freq)

        self.max_time = 6
        self.max_step_per_episode = int(self.max_time*self.network_freq)

        self.env = Valkyrie(
            max_time=self.max_time, renders=True, initial_gap_time=0.1, PD_freq=self.PD_freq,
            Physics_freq=self.Physics_freq, Kp=config.conf['Kp'], Kd=config.conf['Kd'],
            bullet_default_PD=config.conf['bullet-default-PD'], controlled_joints_list=config.conf['controlled-joints'],
            logFileName=dir_path, isEnableSelfCollision=False)

        config.conf['state-dim'] = self.env.stateNumber+1

        self.logging = logger(dir_path)

        self.episode_count = 0
        self.step_count = 0
        self.train_iter_count = 0

        self.best_reward = 0
        self.best_episode = 0
        self.best_train_iter = 0

        self.control = Control(self.config, self.env)

        # img = [[1,2,3]*50]*100
        img = np.zeros((240,320,3))
        self.image = plt.imshow(img,interpolation='none',animated=True)

        self.ax=plt.gca()
        plt.axis('off')

        self.image_list = []

        self.ref_motion = Motion(config=self.config, dsr_gait_freq=0.6)

    def test(self):
        total_reward = 0
        for i in range(self.config.conf['test-num']):#
            quat = self.ref_motion.euler_to_quat(0,0,0)
            _ = self.env._reset(Kp=self.config.conf['Kp'], Kd=self.config.conf['Kd'], base_pos_nom=[0,0,1.575], base_orn_nom=quat, fixed_base=True)
            # state = self.env._reset(Kp=self.config.conf['Kp'], Kd=self.config.conf['Kd'], base_pos_nom=[0, 0, 1.175], fixed_base=False)
            q_nom = self.ref_motion.ref_motion_dict()
            base_orn_nom = self.ref_motion.get_base_orn()

            # state = self.env._reset(Kp=self.config.conf['Kp'], Kd=self.config.conf['Kd'], q_nom=q_nom, base_orn_nom=base_orn_nom, base_pos_nom=[0, 0, 1.175], fixed_base=False)
            # self.env._setupCamera()
            self.env.startRendering()
            self.env._startLoggingVideo()

            for step in range(self.max_step_per_episode):
                if step>=2*self.network_freq and step<4*self.network_freq:
                    action = [0,0,0,0,0,0,0,0,0.1,0,0]
                else:
                    action = [0,0,0,0,0,0,0,0,0,0,0]
                # action = np.clip(action, self.config.conf['actor-output-bounds'][0],
                #                  self.config.conf['actor-output-bounds'][1])
                action = np.array([action]) if len(np.shape(action)) == 0 else np.array(action)

                rgb=self.env._render(roll=0,pitch=0,yaw=90)
                print(rgb.shape)
                self.image_list.append(rgb)
                for i in range(self.sampling_skip):
                    # action = self.control.rescale(ref_action, self.config.conf['action-bounds'],
                    #                               self.config.conf['actor-output-bounds'])
                    _,_,_,_ = self.env._step(action)

                    self.logging.add_run('action', action)

                    joint_angle = self.control.get_joint_angle()
                    self.logging.add_run('joint_angle', joint_angle)
                    readings = self.env.getExtendedReading()
                    ob = self.env.getObservation()
                    for l in range(len(ob)):
                        self.logging.add_run('observation' + str(l), ob[l])
                    # for key, value in readings.items():
                    #     self.logging.add_run(key, value)

            self.env._stopLoggingVideo()
            self.env.stopRendering()

        ave_reward = total_reward/self.config.conf['test-num']
        print(ave_reward)
        self.logging.save_run()


        clip = ImageSequenceClip(self.image_list, fps=25)
        clip.write_gif(self.dir_path+'/test.gif')
        clip.write_videofile(self.dir_path+'/test.mp4', fps=25, audio=False)