def __init__(self): np.random.seed(seed = int(time.time())) self.env = Env(600) self.num_slaves = 16 self.num_state = self.env.GetNumState() self.num_action = self.env.GetNumAction() self.num_dofs = self.env.GetNumDofs() self.num_muscles = self.env.GetNumMuscles() self.num_epochs = 10 self.num_epochs_muscle = 3 self.num_evaluation = 0 self.num_tuple_so_far = 0 self.num_episode = 0 self.num_tuple = 0 self.num_simulation_Hz = self.env.GetSimulationHz() self.num_control_Hz = self.env.GetControlHz() self.num_simulation_per_control = self.num_simulation_Hz // self.num_control_Hz self.gamma = 0.95 self.lb = 0.95 self.buffer_size = 2048 self.batch_size = 128 self.muscle_batch_size = 128 self.replay_buffer = ReplayBuffer(30000) self.muscle_buffer = MuscleBuffer(self.buffer_size*4) self.model = SimulationNN(self.num_state,self.num_action) self.muscle_model = MuscleNN(self.env.GetNumTotalMuscleRelatedDofs(),self.num_dofs-6,self.num_muscles) if use_cuda: self.model.cuda() self.muscle_model.cuda() self.default_learning_rate = 1E-4 self.default_clip_ratio = 0.2 self.learning_rate = self.default_learning_rate self.clip_ratio = self.default_clip_ratio self.optimizer = optim.Adam(self.model.parameters(),lr=self.learning_rate) self.optimizer_muscle = optim.Adam(self.muscle_model.parameters(),lr=self.learning_rate) self.max_iteration = 50000 self.w_entropy = 0.0001 self.loss_actor = 0.0 self.loss_critic = 0.0 self.loss_muscle = 0.0 self.rewards = [] self.sum_return = 0.0 self.max_return = -1.0 self.max_return_epoch = 1 self.tic = time.time() self.episodes = [None]*self.num_slaves for j in range(self.num_slaves): self.episodes[j] = EpisodeBuffer() self.env.Resets(True)
def worker(meta_file, proc_num, state_sender, result_sender, action_receiver, reset_receiver, sample_receiver): """ :type meta_file: str :type proc_num: int :type result_sender: Connection :type state_sender: Connection :type action_receiver: Connection :type reset_receiver: Connection :type sample_receiver: Connection :return: """ # reset variable # 0 : go on (no reset) # 1 : reset # 2 : reset with marginal samples env = Env(meta_file, proc_num) current_path = os.path.dirname( os.path.abspath(__file__)) + '/pushrecoverybvhgenerator' origmot = bvf.readBvhFile_JointMotion( current_path + '/data/walk_simple.bvh', 1.) jed.alignMotionToOrigin(origmot) state = None while True: reset_flag = reset_receiver.recv() if reset_flag == 2: marginal_sample = sample_receiver.recv() env.SetMarginalSampled(marginal_sample[0], marginal_sample[1]) if reset_flag == 1 or reset_flag == 2: env.Reset1() if env.IsWalkingParamChange(): walking_param = env.GetWalkingParams() bvh_str = bvh_generator_server.get_paramed_bvh_walk( origmot, walking_param[0], walking_param[1], walking_param[2], scale=1.) env.SetBvhStr(bvh_str) env.Reset2(True) state = env.GetState() state_sender.send(state) action = action_receiver.recv() env.SetAction(action) env.StepsAtOnce() state = env.GetState() reward = env.GetReward() is_done = env.IsEndOfEpisode() result_sender.send((reward, is_done, proc_num))
def __init__(self,num_slaves): np.random.seed(seed = int(time.time())) self.env = Env(num_slaves) self.num_slaves = num_slaves self.num_state = self.env.GetNumState() self.num_action = self.env.GetNumAction() self.num_dofs = self.env.GetNumDofs() self.num_muscles = self.env.GetNumMuscles() self.num_epochs = 20 self.num_epochs_muscle = 10 self.num_evaluation = 0 self.num_tuple_so_far = 0 self.num_episode = 0 self.num_tuple = 0 self.num_simulation_Hz = self.env.GetSimulationHz() self.num_control_Hz = self.env.GetControlHz() self.num_total_muscle_related_dofs = self.env.GetNumTotalMuscleRelatedDofs() self.num_simulation_per_control = self.num_simulation_Hz // self.num_control_Hz self.use_muscle_nn = True self.gamma = 0.99 self.lb = 0.95 self.clip_ratio = 0.2 self.buffer_size = 2048 self.batch_size = 128 self.muscle_batch_size = 128 self.replay_buffer = ReplayBuffer(30000) self.muscle_buffer = MuscleBuffer(self.buffer_size*4) self.model = SimulationNN(self.num_state,self.num_action) self.muscle_model = MuscleNN(self.num_total_muscle_related_dofs,self.num_dofs-6,self.num_muscles) if use_cuda: self.model.cuda() self.muscle_model.cuda() self.optimizer = optim.Adam(self.model.parameters(),lr=1E-4) self.optimizer_muscle = optim.Adam(self.muscle_model.parameters(),lr=5E-5) self.w_entropy = 0.0 self.alpha = 1.0 self.alpha_decay = 200.0 self.loss_actor = [] self.loss_critic = [] self.loss_muscle = [] self.rewards = [] self.sum_return = 0.0 self.threshold = 6.0 self.current_avg_reward = 0.0 self.tic = time.time()
def __init__(self, meta_file, num_slaves=16): # plt.ion() np.random.seed(seed=int(time.time())) self.num_slaves = num_slaves self.meta_file = meta_file self.env = Env(meta_file, -1) self.use_muscle = self.env.UseMuscle() self.num_state = self.env.GetNumState() self.num_action = self.env.GetNumAction() self.num_muscles = self.env.GetNumMuscles() self.num_epochs = 10 self.num_epochs_muscle = 3 self.num_evaluation = 0 self.num_tuple_so_far = 0 self.num_episode = 0 self.num_tuple = 0 self.num_simulation_Hz = self.env.GetSimulationHz() self.num_control_Hz = self.env.GetControlHz() self.num_simulation_per_control = self.num_simulation_Hz // self.num_control_Hz self.gamma = 0.95 self.lb = 0.99 self.buffer_size = 8192 self.batch_size = 256 self.muscle_batch_size = 128 self.replay_buffer = ReplayBuffer(30000) self.muscle_buffer = MuscleBuffer(30000) self.model = SimulationNN(self.num_state,self.num_action) self.muscle_model = MuscleNN(self.env.GetNumTotalMuscleRelatedDofs(),self.num_action,self.num_muscles) if use_cuda: self.model.cuda() self.muscle_model.cuda() self.default_learning_rate = 1E-4 self.default_clip_ratio = 0.2 self.learning_rate = self.default_learning_rate self.clip_ratio = self.default_clip_ratio self.optimizer = optim.Adam(self.model.parameters(),lr=self.learning_rate) self.optimizer_muscle = optim.Adam(self.muscle_model.parameters(),lr=self.learning_rate) self.max_iteration = 50000 self.w_entropy = -0.001 self.loss_actor = 0.0 self.loss_critic = 0.0 self.loss_muscle = 0.0 self.rewards = [] self.sum_return = 0.0 self.max_return = -1.0 self.max_return_epoch = 1 self.tic = time.time() # for adaptive sampling, marginal value training self.use_adaptive_sampling = self.env.UseAdaptiveSampling() self.marginal_state_num = self.env.GetMarginalStateNum() self.marginal_buffer = MargianlBuffer(30000) self.marginal_model = MarginalNN(self.marginal_state_num) self.marginal_value_avg = 1. self.marginal_learning_rate = 1e-3 if use_cuda: self.marginal_model.cuda() self.marginal_optimizer = optim.SGD(self.marginal_model.parameters(), lr=self.marginal_learning_rate) self.marginal_loss = 0.0 self.marginal_samples = [] self.marginal_sample_cumulative_prob = [] self.marginal_sample_num = 2000 self.marginal_k = self.env.GetMarginalParameter() self.mcmc_burn_in = 1000 self.mcmc_period = 20 self.total_episodes = [] self.state_sender = [] # type: list[Connection] self.result_sender = [] # type: list[Connection] self.state_receiver = [] # type: list[Connection] self.result_receiver = [] # type: list[Connection] self.action_sender = [] # type: list[Connection] self.reset_sender = [] # type: list[Connection] self.marginal_sample_sender = [] # type: list[Connection] self.envs = [] # type: list[Process] self.init_envs() self.idx = 0