Example #1
0
    def __init__(self, config, dir_path):
        self.dir_path = dir_path
        self.config = config
        self.config.load_configuration(dir_path)
        self.config.print_configuration()

        self.PD_freq = self.config.conf['LLC-frequency']
        self.Physics_freq = self.config.conf['Physics-frequency']
        self.network_freq = self.config.conf['HLC-frequency']
        self.sampling_skip = int(self.PD_freq / self.network_freq)

        self.reward_decay = 1.0
        self.reward_scale = config.conf['reward-scale']
        self.reward_scale = self.reward_scale / float(
            self.sampling_skip)  # /10.0#normalizing reward to 1
        self.max_time = 10  #16
        self.max_step_per_episode = int(self.max_time * self.network_freq)

        self.env = Valkyrie(
            max_time=self.max_time,
            renders=True,
            initial_gap_time=0.1,
            PD_freq=self.PD_freq,
            Physics_freq=self.Physics_freq,
            Kp=config.conf['Kp'],
            Kd=config.conf['Kd'],
            bullet_default_PD=config.conf['bullet-default-PD'],
            controlled_joints_list=config.conf['controlled-joints'],
            logFileName=dir_path)

        config.conf['state-dim'] = self.env.stateNumber
        self.agent = Agent(self.env, self.config)
        self.agent.load_weight(dir_path + '/best_network')

        self.logging = logger(dir_path)

        self.episode_count = 0
        self.step_count = 0
        self.train_iter_count = 0

        self.best_reward = 0
        self.best_episode = 0
        self.best_train_iter = 0

        self.control = Control(self.config, self.env)

        # create new network

        self.force = [0, 0, 0]
        self.force_period = [5 * self.network_freq, 5 * self.network_freq + 3]
        self.force_chest = [0, 0,
                            0]  # max(0,force_chest[1]-300*1.0 / EXPLORE)]
        self.force_pelvis = [0, 0, 0]

        self.image_list = []
Example #2
0
    def __init__(self, config, dir_path):
        self.dir_path = dir_path
        self.config = config

        self.PD_freq = self.config.conf['LLC-frequency']
        self.Physics_freq = self.config.conf['Physics-frequency']
        self.network_freq = self.config.conf['HLC-frequency']
        self.sampling_skip = int(self.PD_freq/self.network_freq)

        self.max_time = 6
        self.max_step_per_episode = int(self.max_time*self.network_freq)

        self.env = Valkyrie(
            max_time=self.max_time, renders=True, initial_gap_time=0.1, PD_freq=self.PD_freq,
            Physics_freq=self.Physics_freq, Kp=config.conf['Kp'], Kd=config.conf['Kd'],
            bullet_default_PD=config.conf['bullet-default-PD'], controlled_joints_list=config.conf['controlled-joints'],
            logFileName=dir_path, isEnableSelfCollision=False)

        config.conf['state-dim'] = self.env.stateNumber+1

        self.logging = logger(dir_path)

        self.episode_count = 0
        self.step_count = 0
        self.train_iter_count = 0

        self.best_reward = 0
        self.best_episode = 0
        self.best_train_iter = 0

        self.control = Control(self.config, self.env)

        # img = [[1,2,3]*50]*100
        img = np.zeros((240,320,3))
        self.image = plt.imshow(img,interpolation='none',animated=True)

        self.ax=plt.gca()
        plt.axis('off')

        self.image_list = []

        self.ref_motion = Motion(config=self.config, dsr_gait_freq=0.6)
Example #3
0
    def __init__(self, config):
        self.config = config

        self.PD_freq = self.config.conf['LLC-frequency']
        self.Physics_freq = self.config.conf['Physics-frequency']
        self.network_freq = self.config.conf['HLC-frequency']
        self.sampling_skip = int(self.PD_freq / self.network_freq)

        self.reward_decay = 1.0
        self.reward_scale = config.conf['reward-scale']
        self.reward_scale = self.reward_scale / float(
            self.sampling_skip)  # /10.0#normalizing reward to 1

        self.max_time_per_train_episode = self.config.conf['max-train-time']
        self.max_step_per_train_episode = int(self.max_time_per_train_episode *
                                              self.network_freq)
        self.max_time_per_test_episode = self.config.conf['max-test-time']  #16
        self.max_step_per_test_episode = int(self.max_time_per_test_episode *
                                             self.network_freq)
        self.train_external_force_disturbance = True
        if self.train_external_force_disturbance == True:
            path_str = 'with_external_force_disturbance/'
        else:
            path_str = 'without_external_force_disturbance/'
        self.test_external_force_disturbance = True

        self.env = Valkyrie(
            max_time=self.max_time_per_train_episode,
            renders=False,
            initial_gap_time=0.5,
            PD_freq=self.PD_freq,
            Physics_freq=self.Physics_freq,
            Kp=config.conf['Kp'],
            Kd=config.conf['Kd'],
            bullet_default_PD=config.conf['bullet-default-PD'],
            controlled_joints_list=config.conf['controlled-joints'])

        config.conf['state-dim'] = self.env.stateNumber
        self.agent = Agent(self.env, self.config)

        self.episode_count = 0
        self.step_count = 0
        self.train_iter_count = 0

        self.best_reward = 0
        self.best_episode = 0
        self.best_train_iter = 0

        self.control = Control(self.config, self.env)

        # load weight from previous network
        # dir_path = 'record/2017_12_04_15.20.44/no_force'  # '2017_05_29_18.23.49/with_force'

        # create new network
        dir_path = 'TRPO/record/' + '3D_push/' + path_str + datetime.now(
        ).strftime('%Y_%m_%d_%H.%M.%S')
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)
        if not os.path.exists(dir_path + '/saved_actor_networks'):
            os.makedirs(dir_path + '/saved_actor_networks')
        if not os.path.exists(dir_path + '/saved_critic_networks'):
            os.makedirs(dir_path + '/saved_critic_networks')
        self.logging = logger(dir_path)
        config.save_configuration(dir_path)
        config.record_configuration(dir_path)
        config.print_configuration()
        self.agent.load_weight(dir_path)
        self.dir_path = dir_path

        self.on_policy_paths = []
        self.off_policy_paths = []
        self.buffer = ReplayBuffer(self.config.conf['replay-buffer-size'])

        self.force = [0, 0, 0]
        self.force_chest = [0, 0,
                            0]  # max(0,force_chest[1]-300*1.0 / EXPLORE)]
        self.force_pelvis = [0, 0, 0]
Example #4
0
    def __init__(self, config, dir_path):
        self.dir_path = dir_path
        self.config = config
        self.config.load_configuration(dir_path)
        self.config.print_configuration()

        self.PD_freq = self.config.conf['LLC-frequency']
        self.Physics_freq = self.config.conf['Physics-frequency']
        self.network_freq = self.config.conf['HLC-frequency']
        self.sampling_skip = int(self.PD_freq / self.network_freq)

        self.reward_decay = 1.0
        self.reward_scale = config.conf['reward-scale']
        self.reward_scale = self.reward_scale / float(
            self.sampling_skip)  # /10.0#normalizing reward to 1
        self.max_time = 10  #16
        self.max_step_per_episode = int(self.max_time * self.network_freq)

        self.env = Valkyrie(
            max_time=self.max_time,
            renders=False,
            initial_gap_time=0.5,
            PD_freq=self.PD_freq,
            Physics_freq=self.Physics_freq,
            Kp=config.conf['Kp'],
            Kd=config.conf['Kd'],
            bullet_default_PD=config.conf['bullet-default-PD'],
            controlled_joints_list=config.conf['controlled-joints'],
            logFileName=dir_path,
            isEnableSelfCollision=False)

        config.conf['state-dim'] = self.env.stateNumber + 2
        self.agent = Agent(self.env, self.config)
        self.agent.load_weight(dir_path + '/best_network')

        self.logging = logger(dir_path)

        self.episode_count = 0
        self.step_count = 0
        self.train_iter_count = 0

        self.best_reward = 0
        self.best_episode = 0
        self.best_train_iter = 0

        self.control = Control(self.config, self.env)

        # create new network

        self.force = [0, 0, 0]
        self.force_chest = [0, 0,
                            0]  # max(0,force_chest[1]-300*1.0 / EXPLORE)]
        self.force_pelvis = [0, 0, 0]

        # img = [[1,2,3]*50]*100
        img = np.zeros((240, 320, 3))
        self.image = plt.imshow(img, interpolation='none', animated=True)

        self.ax = plt.gca()
        plt.axis('off')

        self.image_list = []

        self.ref_motion = Motion(config=self.config, dsr_gait_freq=0.6)