def __init__(self, config, dir_path): self.dir_path = dir_path self.config = config self.config.load_configuration(dir_path) self.config.print_configuration() self.PD_freq = self.config.conf['LLC-frequency'] self.Physics_freq = self.config.conf['Physics-frequency'] self.network_freq = self.config.conf['HLC-frequency'] self.sampling_skip = int(self.PD_freq / self.network_freq) self.reward_decay = 1.0 self.reward_scale = config.conf['reward-scale'] self.reward_scale = self.reward_scale / float( self.sampling_skip) # /10.0#normalizing reward to 1 self.max_time = 10 #16 self.max_step_per_episode = int(self.max_time * self.network_freq) self.env = Valkyrie( max_time=self.max_time, renders=True, initial_gap_time=0.1, PD_freq=self.PD_freq, Physics_freq=self.Physics_freq, Kp=config.conf['Kp'], Kd=config.conf['Kd'], bullet_default_PD=config.conf['bullet-default-PD'], controlled_joints_list=config.conf['controlled-joints'], logFileName=dir_path) config.conf['state-dim'] = self.env.stateNumber self.agent = Agent(self.env, self.config) self.agent.load_weight(dir_path + '/best_network') self.logging = logger(dir_path) self.episode_count = 0 self.step_count = 0 self.train_iter_count = 0 self.best_reward = 0 self.best_episode = 0 self.best_train_iter = 0 self.control = Control(self.config, self.env) # create new network self.force = [0, 0, 0] self.force_period = [5 * self.network_freq, 5 * self.network_freq + 3] self.force_chest = [0, 0, 0] # max(0,force_chest[1]-300*1.0 / EXPLORE)] self.force_pelvis = [0, 0, 0] self.image_list = []
def __init__(self, config, dir_path): self.dir_path = dir_path self.config = config self.PD_freq = self.config.conf['LLC-frequency'] self.Physics_freq = self.config.conf['Physics-frequency'] self.network_freq = self.config.conf['HLC-frequency'] self.sampling_skip = int(self.PD_freq/self.network_freq) self.max_time = 6 self.max_step_per_episode = int(self.max_time*self.network_freq) self.env = Valkyrie( max_time=self.max_time, renders=True, initial_gap_time=0.1, PD_freq=self.PD_freq, Physics_freq=self.Physics_freq, Kp=config.conf['Kp'], Kd=config.conf['Kd'], bullet_default_PD=config.conf['bullet-default-PD'], controlled_joints_list=config.conf['controlled-joints'], logFileName=dir_path, isEnableSelfCollision=False) config.conf['state-dim'] = self.env.stateNumber+1 self.logging = logger(dir_path) self.episode_count = 0 self.step_count = 0 self.train_iter_count = 0 self.best_reward = 0 self.best_episode = 0 self.best_train_iter = 0 self.control = Control(self.config, self.env) # img = [[1,2,3]*50]*100 img = np.zeros((240,320,3)) self.image = plt.imshow(img,interpolation='none',animated=True) self.ax=plt.gca() plt.axis('off') self.image_list = [] self.ref_motion = Motion(config=self.config, dsr_gait_freq=0.6)
def __init__(self, config): self.config = config self.PD_freq = self.config.conf['LLC-frequency'] self.Physics_freq = self.config.conf['Physics-frequency'] self.network_freq = self.config.conf['HLC-frequency'] self.sampling_skip = int(self.PD_freq / self.network_freq) self.reward_decay = 1.0 self.reward_scale = config.conf['reward-scale'] self.reward_scale = self.reward_scale / float( self.sampling_skip) # /10.0#normalizing reward to 1 self.max_time_per_train_episode = self.config.conf['max-train-time'] self.max_step_per_train_episode = int(self.max_time_per_train_episode * self.network_freq) self.max_time_per_test_episode = self.config.conf['max-test-time'] #16 self.max_step_per_test_episode = int(self.max_time_per_test_episode * self.network_freq) self.train_external_force_disturbance = True if self.train_external_force_disturbance == True: path_str = 'with_external_force_disturbance/' else: path_str = 'without_external_force_disturbance/' self.test_external_force_disturbance = True self.env = Valkyrie( max_time=self.max_time_per_train_episode, renders=False, initial_gap_time=0.5, PD_freq=self.PD_freq, Physics_freq=self.Physics_freq, Kp=config.conf['Kp'], Kd=config.conf['Kd'], bullet_default_PD=config.conf['bullet-default-PD'], controlled_joints_list=config.conf['controlled-joints']) config.conf['state-dim'] = self.env.stateNumber self.agent = Agent(self.env, self.config) self.episode_count = 0 self.step_count = 0 self.train_iter_count = 0 self.best_reward = 0 self.best_episode = 0 self.best_train_iter = 0 self.control = Control(self.config, self.env) # load weight from previous network # dir_path = 'record/2017_12_04_15.20.44/no_force' # '2017_05_29_18.23.49/with_force' # create new network dir_path = 'TRPO/record/' + '3D_push/' + path_str + datetime.now( ).strftime('%Y_%m_%d_%H.%M.%S') if not os.path.exists(dir_path): os.makedirs(dir_path) if not os.path.exists(dir_path + '/saved_actor_networks'): os.makedirs(dir_path + '/saved_actor_networks') if not os.path.exists(dir_path + '/saved_critic_networks'): os.makedirs(dir_path + '/saved_critic_networks') self.logging = logger(dir_path) config.save_configuration(dir_path) config.record_configuration(dir_path) config.print_configuration() self.agent.load_weight(dir_path) self.dir_path = dir_path self.on_policy_paths = [] self.off_policy_paths = [] self.buffer = ReplayBuffer(self.config.conf['replay-buffer-size']) self.force = [0, 0, 0] self.force_chest = [0, 0, 0] # max(0,force_chest[1]-300*1.0 / EXPLORE)] self.force_pelvis = [0, 0, 0]
def __init__(self, config, dir_path): self.dir_path = dir_path self.config = config self.config.load_configuration(dir_path) self.config.print_configuration() self.PD_freq = self.config.conf['LLC-frequency'] self.Physics_freq = self.config.conf['Physics-frequency'] self.network_freq = self.config.conf['HLC-frequency'] self.sampling_skip = int(self.PD_freq / self.network_freq) self.reward_decay = 1.0 self.reward_scale = config.conf['reward-scale'] self.reward_scale = self.reward_scale / float( self.sampling_skip) # /10.0#normalizing reward to 1 self.max_time = 10 #16 self.max_step_per_episode = int(self.max_time * self.network_freq) self.env = Valkyrie( max_time=self.max_time, renders=False, initial_gap_time=0.5, PD_freq=self.PD_freq, Physics_freq=self.Physics_freq, Kp=config.conf['Kp'], Kd=config.conf['Kd'], bullet_default_PD=config.conf['bullet-default-PD'], controlled_joints_list=config.conf['controlled-joints'], logFileName=dir_path, isEnableSelfCollision=False) config.conf['state-dim'] = self.env.stateNumber + 2 self.agent = Agent(self.env, self.config) self.agent.load_weight(dir_path + '/best_network') self.logging = logger(dir_path) self.episode_count = 0 self.step_count = 0 self.train_iter_count = 0 self.best_reward = 0 self.best_episode = 0 self.best_train_iter = 0 self.control = Control(self.config, self.env) # create new network self.force = [0, 0, 0] self.force_chest = [0, 0, 0] # max(0,force_chest[1]-300*1.0 / EXPLORE)] self.force_pelvis = [0, 0, 0] # img = [[1,2,3]*50]*100 img = np.zeros((240, 320, 3)) self.image = plt.imshow(img, interpolation='none', animated=True) self.ax = plt.gca() plt.axis('off') self.image_list = [] self.ref_motion = Motion(config=self.config, dsr_gait_freq=0.6)