Example #1
0
def worker(meta_file, proc_num, state_sender, result_sender, action_receiver, reset_receiver, sample_receiver):
    """

    :type meta_file: str
    :type proc_num: int
    :type result_sender: Connection
    :type state_sender: Connection
    :type action_receiver: Connection
    :type reset_receiver: Connection
    :type sample_receiver: Connection
    :return:
    """

    # reset variable
    # 0 : go on (no reset)
    # 1 : reset
    # 2 : reset with marginal samples

    env = Env(meta_file, proc_num)

    current_path = os.path.dirname(os.path.abspath(__file__)) + '/pushrecoverybvhgenerator'
    origmot = bvf.readBvhFile_JointMotion(current_path+'/data/walk_simple.bvh', 1.)
    jed.alignMotionToOrigin(origmot)

    state = None
    while True:
        reset_flag = reset_receiver.recv()

        if reset_flag == 2:
            marginal_sample = sample_receiver.recv()
            env.SetMarginalSampled(marginal_sample[0], marginal_sample[1])

        if reset_flag == 1 or reset_flag == 2:
            env.Reset1()
            if env.IsWalkingParamChange():
                walking_param = env.GetWalkingParams()
                bvh_str = bvh_generator_server.get_paramed_bvh_walk(origmot, walking_param[0], walking_param[1], walking_param[2], scale=1.)
                env.SetBvhStr(bvh_str)
            env.Reset2(True)
            state = env.GetState()

        state_sender.send(state)
        action = action_receiver.recv()
        env.SetAction(action)
        env.StepsAtOnce()
        state = env.GetState()
        reward = env.GetReward()
        is_done = env.IsEndOfEpisode()
        result_sender.send((reward, is_done, proc_num))
Example #2
0
    def __init__(self, meta_file, num_slaves=16):
        # plt.ion()
        np.random.seed(seed=int(time.time()))
        self.num_slaves = num_slaves
        self.meta_file = meta_file
        self.env = Env(meta_file, -1)
        self.use_muscle = self.env.UseMuscle()
        self.num_state = self.env.GetNumState()
        self.num_action = self.env.GetNumAction()
        self.num_muscles = self.env.GetNumMuscles()

        self.num_epochs = 10
        self.num_epochs_muscle = 3
        self.num_evaluation = 0
        self.num_tuple_so_far = 0
        self.num_episode = 0
        self.num_tuple = 0
        self.num_simulation_Hz = self.env.GetSimulationHz()
        self.num_control_Hz = self.env.GetControlHz()
        self.num_simulation_per_control = self.num_simulation_Hz // self.num_control_Hz

        self.gamma = 0.95
        self.lb = 0.99

        self.buffer_size = 8192
        self.batch_size = 256
        self.muscle_batch_size = 128
        self.replay_buffer = ReplayBuffer(30000)
        self.muscle_buffer = MuscleBuffer(30000)

        self.model = SimulationNN(self.num_state,self.num_action)

        self.muscle_model = MuscleNN(self.env.GetNumTotalMuscleRelatedDofs(),self.num_action,self.num_muscles)

        if use_cuda:
            self.model.cuda()
            self.muscle_model.cuda()

        self.default_learning_rate = 1E-4
        self.default_clip_ratio = 0.2
        self.learning_rate = self.default_learning_rate
        self.clip_ratio = self.default_clip_ratio
        self.optimizer = optim.Adam(self.model.parameters(),lr=self.learning_rate)
        self.optimizer_muscle = optim.Adam(self.muscle_model.parameters(),lr=self.learning_rate)
        self.max_iteration = 50000

        self.w_entropy = -0.001

        self.loss_actor = 0.0
        self.loss_critic = 0.0
        self.loss_muscle = 0.0
        self.rewards = []
        self.sum_return = 0.0
        self.max_return = -1.0
        self.max_return_epoch = 1
        self.tic = time.time()

        # for adaptive sampling, marginal value training
        self.use_adaptive_sampling = self.env.UseAdaptiveSampling()
        self.marginal_state_num = self.env.GetMarginalStateNum()
        self.marginal_buffer = MargianlBuffer(30000)
        self.marginal_model = MarginalNN(self.marginal_state_num)
        self.marginal_value_avg = 1.
        self.marginal_learning_rate = 1e-3
        if use_cuda:
            self.marginal_model.cuda()
        self.marginal_optimizer = optim.SGD(self.marginal_model.parameters(), lr=self.marginal_learning_rate)
        self.marginal_loss = 0.0
        self.marginal_samples = []
        self.marginal_sample_cumulative_prob = []
        self.marginal_sample_num = 2000
        self.marginal_k = self.env.GetMarginalParameter()
        self.mcmc_burn_in = 1000
        self.mcmc_period = 20

        self.total_episodes = []

        self.state_sender = []  # type: list[Connection]
        self.result_sender = []  # type: list[Connection]
        self.state_receiver = []  # type: list[Connection]
        self.result_receiver = []  # type: list[Connection]
        self.action_sender = []  # type: list[Connection]
        self.reset_sender = []  # type: list[Connection]
        self.marginal_sample_sender = []  # type: list[Connection]
        self.envs = []  # type: list[Process]

        self.init_envs()
        self.idx = 0
Example #3
0
    def __init__(self, params, metadata_dir, nn_finding_dir=None):
        self.is_muscle, self.is_pushed_during_training, self.is_multi_seg_foot, self.is_walking_variance, self.is_walking_param_normal_trained, self.crouch = \
            params

        option = ''
        option += 'muscle_' if self.is_muscle else 'torque_'
        option += 'push_' if self.is_pushed_during_training else 'nopush_'
        option += 'msf_' if self.is_multi_seg_foot else 'sf_'

        assert self.crouch in ['0', '20', '30', '60', 'all']
        if self.crouch != 'all':
            option += 'crouch'
        option += self.crouch
        option += '_mean'
        if self.is_walking_variance:
            option += '_var_'
            option += 'normal' if self.is_walking_param_normal_trained else 'uniform'

        nn_dir = None
        if nn_finding_dir is not None:
            nn_dir = glob.glob(nn_finding_dir + option)[0]

        self.env = EnvWrapper(metadata_dir + option + '.txt')
        num_state = self.env.GetNumState()
        num_action = self.env.GetNumAction()
        num_actions = self.env.GetNumAction()

        self.nn_module = None

        if nn_dir is not None:
            self.nn_module = SimulationNN(num_state, num_action)
            self.nn_module.load(nn_dir + '/max.pt')

        self.muscle_nn_module = None

        if self.is_muscle and nn_dir is not None:
            num_total_muscle_related_dofs = self.env.GetNumTotalMuscleRelatedDofs(
            )
            num_muscles = self.env.GetNumMuscles()
            self.muscle_nn_module = MuscleNN(num_total_muscle_related_dofs,
                                             num_actions, num_muscles)
            self.muscle_nn_module.load(nn_dir + '/max_muscle.pt')

        self.walk_fsm = WalkFSM()
        self.push_step = 8
        self.push_duration = .2
        self.push_force = 50.
        self.push_start_timing = 50.

        self.step_length_ratio = 1.
        self.walk_speed_ratio = 1.
        self.duration_ratio = 1.

        self.info_start_time = 0.
        self.info_end_time = 0.
        self.info_root_pos = []
        self.info_left_foot_pos = []
        self.info_right_foot_pos = []

        self.push_start_time = 30.
        self.push_end_time = 0.
        self.walking_dir = np.zeros(3)

        self.pushed_step = 0
        self.pushed_length = 0

        self.max_detour_length = 0.
        self.max_detour_step_count = 0

        self.valid = True
Example #4
0
class PPO(object):
    def __init__(self, meta_file, num_slaves=16):
        # plt.ion()
        np.random.seed(seed=int(time.time()))
        self.num_slaves = num_slaves
        self.meta_file = meta_file
        self.env = Env(meta_file, -1)
        self.use_muscle = self.env.UseMuscle()
        self.num_state = self.env.GetNumState()
        self.num_action = self.env.GetNumAction()
        self.num_muscles = self.env.GetNumMuscles()

        self.num_epochs = 10
        self.num_epochs_muscle = 3
        self.num_evaluation = 0
        self.num_tuple_so_far = 0
        self.num_episode = 0
        self.num_tuple = 0
        self.num_simulation_Hz = self.env.GetSimulationHz()
        self.num_control_Hz = self.env.GetControlHz()
        self.num_simulation_per_control = self.num_simulation_Hz // self.num_control_Hz

        self.gamma = 0.95
        self.lb = 0.99

        self.buffer_size = 8192
        self.batch_size = 256
        self.muscle_batch_size = 128
        self.replay_buffer = ReplayBuffer(30000)
        self.muscle_buffer = MuscleBuffer(30000)

        self.model = SimulationNN(self.num_state,self.num_action)

        self.muscle_model = MuscleNN(self.env.GetNumTotalMuscleRelatedDofs(),self.num_action,self.num_muscles)

        if use_cuda:
            self.model.cuda()
            self.muscle_model.cuda()

        self.default_learning_rate = 1E-4
        self.default_clip_ratio = 0.2
        self.learning_rate = self.default_learning_rate
        self.clip_ratio = self.default_clip_ratio
        self.optimizer = optim.Adam(self.model.parameters(),lr=self.learning_rate)
        self.optimizer_muscle = optim.Adam(self.muscle_model.parameters(),lr=self.learning_rate)
        self.max_iteration = 50000

        self.w_entropy = -0.001

        self.loss_actor = 0.0
        self.loss_critic = 0.0
        self.loss_muscle = 0.0
        self.rewards = []
        self.sum_return = 0.0
        self.max_return = -1.0
        self.max_return_epoch = 1
        self.tic = time.time()

        # for adaptive sampling, marginal value training
        self.use_adaptive_sampling = self.env.UseAdaptiveSampling()
        self.marginal_state_num = self.env.GetMarginalStateNum()
        self.marginal_buffer = MargianlBuffer(30000)
        self.marginal_model = MarginalNN(self.marginal_state_num)
        self.marginal_value_avg = 1.
        self.marginal_learning_rate = 1e-3
        if use_cuda:
            self.marginal_model.cuda()
        self.marginal_optimizer = optim.SGD(self.marginal_model.parameters(), lr=self.marginal_learning_rate)
        self.marginal_loss = 0.0
        self.marginal_samples = []
        self.marginal_sample_cumulative_prob = []
        self.marginal_sample_num = 2000
        self.marginal_k = self.env.GetMarginalParameter()
        self.mcmc_burn_in = 1000
        self.mcmc_period = 20

        self.total_episodes = []

        self.state_sender = []  # type: list[Connection]
        self.result_sender = []  # type: list[Connection]
        self.state_receiver = []  # type: list[Connection]
        self.result_receiver = []  # type: list[Connection]
        self.action_sender = []  # type: list[Connection]
        self.reset_sender = []  # type: list[Connection]
        self.marginal_sample_sender = []  # type: list[Connection]
        self.envs = []  # type: list[Process]

        self.init_envs()
        self.idx = 0

    def init_envs(self):
        for slave_idx in range(self.num_slaves):
            s_s, s_r = Pipe()
            r_s, r_r = Pipe()
            a_s, a_r = Pipe()
            reset_s, reset_r = Pipe()
            marginal_s, marginal_r = Pipe()
            p = Process(target=worker, args=(self.meta_file, slave_idx, s_s, r_s, a_r, reset_r, marginal_r))
            self.state_sender.append(s_s)
            self.result_sender.append(r_s)
            self.state_receiver.append(s_r)
            self.result_receiver.append(r_r)
            self.action_sender.append(a_s)
            self.reset_sender.append(reset_s)
            self.marginal_sample_sender.append(marginal_s)
            self.envs.append(p)
            p.start()

    def reinit_env(self, slave_idx):
        print('reinit_env: ', slave_idx)
        if self.envs[slave_idx].is_alive():
            self.envs[slave_idx].terminate()
        s_s, s_r = Pipe()
        r_s, r_r = Pipe()
        a_s, a_r = Pipe()
        reset_s, reset_r = Pipe()
        marginal_s, marginal_r = Pipe()
        p = Process(target=worker, args=(self.meta_file, slave_idx, s_s, r_s, a_r, reset_r, marginal_r))
        self.state_sender[slave_idx] = s_s
        self.result_sender[slave_idx] = r_s
        self.state_receiver[slave_idx] = s_r
        self.result_receiver[slave_idx] = r_r
        self.action_sender[slave_idx] = a_s
        self.reset_sender[slave_idx] = reset_s
        self.marginal_sample_sender[slave_idx] = marginal_s
        self.envs[slave_idx] = p
        p.start()

    def envs_get_states(self, terminated):
        states = []
        for recv_idx in range(len(self.state_receiver)):
            if terminated[recv_idx]:
                states.append([0.] * self.num_state)
            else:
                states.append(self.state_receiver[recv_idx].recv())
        return states

    def envs_send_actions(self, actions, terminated):
        for i in range(len(self.action_sender)):
            if not terminated[i]:
                self.action_sender[i].send(actions[i])

    def envs_get_status(self, terminated):
        status = [None for _ in range(self.num_slaves)]  # type: List[Tuple[Float|None, Bool]]

        alive_receivers = [self.result_receiver[recv_idx] for recv_idx, x in enumerate(terminated) if not x]

        for receiver in alive_receivers:
            if receiver.poll(20):
                recv_data = receiver.recv()
                status[recv_data[2]] = (recv_data[0], recv_data[1])

        for j in range(len(status)):
            if terminated[j]:
                status[j] = (0., True)
            elif status[j] is None:
                # assertion error, reinit in GenerateTransition
                status[j] = (None, True)

        # status = []
        # for recv_idx in range(len(self.result_receiver)):
        #     if terminated[recv_idx]:
        #         status.append((0., True))
        #     else:
        #         status.append(self.result_receiver[recv_idx].recv())
        return zip(*status)

    def envs_resets(self, reset_flag):
        for i in range(len(self.reset_sender)):
            self.reset_sender[i].send(reset_flag)

    def envs_reset(self, i, reset_flag):
        self.reset_sender[i].send(reset_flag)

    def SaveModel(self):
        self.model.save('../nn/current.pt')
        if self.use_muscle:
            self.muscle_model.save('../nn/current_muscle.pt')
        if self.use_adaptive_sampling:
            self.marginal_model.save('../nn/current_marginal.pt')

        if self.max_return_epoch == self.num_evaluation:
            self.model.save('../nn/max.pt')
            if self.use_muscle:
                self.muscle_model.save('../nn/max_muscle.pt')
            if self.use_adaptive_sampling:
                self.marginal_model.save('../nn/max_marginal.pt')
        if self.num_evaluation % 100 == 0:
            self.model.save('../nn/'+str(self.num_evaluation//100)+'.pt')
            if self.use_muscle:
                self.muscle_model.save('../nn/'+str(self.num_evaluation//100)+'_muscle.pt')
            if self.use_adaptive_sampling:
                self.marginal_model.save('../nn/'+str(self.num_evaluation//100)+'_marginal.pt')

    def LoadModel(self, path):
        self.model.load('../nn/'+path+'.pt')
        if self.use_muscle:
            self.muscle_model.load('../nn/'+path+'_muscle.pt')
        if self.use_adaptive_sampling:
            self.marginal_model.load('../nn/'+path+'_marginal.pt')

    def ComputeTDandGAE(self):
        self.replay_buffer.Clear()
        self.muscle_buffer.Clear()
        self.marginal_buffer.Clear()
        self.sum_return = 0.0
        for epi in self.total_episodes:
            data = epi.GetData()
            size = len(data)
            if size == 0:
                continue
            states, actions, rewards, values, logprobs = zip(*data)

            values = np.concatenate((values, np.zeros(1)), axis=0)
            advantages = np.zeros(size)
            ad_t = 0

            epi_return = 0.0
            for i in reversed(range(len(data))):
                epi_return += rewards[i]
                delta = rewards[i] + values[i+1] * self.gamma - values[i]
                ad_t = delta + self.gamma * self.lb * ad_t
                advantages[i] = ad_t
            self.sum_return += epi_return
            TD = values[:size] + advantages

            for i in range(size):
                self.replay_buffer.Push(states[i], actions[i], logprobs[i], TD[i], advantages[i])
            
            if self.use_adaptive_sampling:
                for i in range(size):
                    self.marginal_buffer.Push(states[i][-self.marginal_state_num:], values[i])

        self.num_episode = len(self.total_episodes)
        self.num_tuple = len(self.replay_buffer.buffer)
        # print('SIM : {}'.format(self.num_tuple))
        self.num_tuple_so_far += self.num_tuple

        if self.use_muscle:
            muscle_tuples = self.env.GetMuscleTuples()
            for i in range(len(muscle_tuples)):
                self.muscle_buffer.Push(muscle_tuples[i][0],muscle_tuples[i][1],muscle_tuples[i][2],muscle_tuples[i][3])

    def SampleStatesForMarginal(self):
        # MCMC : Metropolitan-Hastings
        _marginal_samples = []
        marginal_sample_prob = []
        marginal_sample_cumulative_prob = []
        p_sb = 0.
        mcmc_idx = 0
        while len(_marginal_samples) < self.marginal_sample_num:
            # Generation
            state_sb_prime = self.env.SampleMarginalState()
            
            # Evaluation
            marginal_value = self.marginal_model(Tensor(state_sb_prime)).cpu().detach().numpy().reshape(-1)
            # print(marginal_value, state_sb_prime)
            p_sb_prime = math.exp(self.marginal_k * (1. - marginal_value/self.marginal_value_avg) )

            # Rejection
            if p_sb_prime > np.random.rand() * p_sb:
                if mcmc_idx > self.mcmc_burn_in:
                    _marginal_samples.append(state_sb_prime)
                    marginal_sample_prob.append(p_sb_prime)
                p_sb = p_sb_prime
                mcmc_idx += 1

        sorted_y_idx_list = sorted(range(len(marginal_sample_prob)), key=lambda x: marginal_sample_prob[x])
        marginal_samples = [_marginal_samples[i] for i in sorted_y_idx_list]
        marginal_sample_prob.sort()

        marginal_sample_cumulative_prob.append(marginal_sample_prob[0])

        for i in range(1, len(marginal_sample_prob)):
            marginal_sample_cumulative_prob.append(marginal_sample_prob[i] + marginal_sample_cumulative_prob[-1])

        for i in range(len(marginal_sample_cumulative_prob)):
            marginal_sample_cumulative_prob[i] = marginal_sample_cumulative_prob[i]/marginal_sample_cumulative_prob[-1]

        # print(self.marginal_value_avg, sum(marginal_sample_cumulative_prob))
        # plt.figure(0)
        # plt.clf()
        # stride_idx = len(marginal_samples[0])-2
        # speed_idx = len(marginal_samples[0])-1
        # xx = []
        # yy = []
        #
        # for marginal_sample in marginal_samples:
        #     marginal_sample_exact = marginal_sample.copy()
        #     marginal_sample_exact[stride_idx] *= math.sqrt(0.00323409929)
        #     marginal_sample_exact[speed_idx] *= math.sqrt(0.00692930964)
        #     marginal_sample_exact[stride_idx] += 1.12620703
        #     marginal_sample_exact[speed_idx] += 0.994335964
        #
        #     xx.append(marginal_sample_exact[stride_idx])
        #     yy.append(marginal_sample_exact[speed_idx])
        #
        # plt.scatter(xx, yy)
        #
        # # plt.xlim(left=-3., right=3.)
        # # plt.ylim(bottom=-3., top=3.)
        # plt.xlim(left=0., right=2.)
        # plt.ylim(bottom=0., top=2.)
        # plt.show()
        # plt.pause(0.001)
        # self.env.SetMarginalSampled(np.asarray(marginal_samples), marginal_sample_cumulative_prob)

        self.marginal_samples = np.asarray(marginal_samples)
        self.marginal_sample_cumulative_prob = marginal_sample_cumulative_prob

    def GenerateTransitions(self):
        del self.total_episodes[:]
        episodes = [EpisodeBuffer() for _ in range(self.num_slaves)]

        if self.use_adaptive_sampling and (self.idx % self.mcmc_period == 0):
            self.envs_resets(2)
            for i in range(len(self.marginal_sample_sender)):
                self.marginal_sample_sender[i].send((self.marginal_samples, self.marginal_sample_cumulative_prob))
        else:
            self.envs_resets(1)

        local_step = 0
        terminated = [False] * self.num_slaves
        counter = 0
        while local_step < self.buffer_size or not all(terminated):
            counter += 1
            # if counter % 10 == 0:
            #     print('SIM : {}'.format(local_step),end='\r')

            states = self.envs_get_states(terminated)

            a_dist, v = self.model(Tensor(states))
            actions = a_dist.sample().cpu().detach().numpy()
            # actions = a_dist.loc.cpu().detach().numpy()
            logprobs = a_dist.log_prob(Tensor(actions)).cpu().detach().numpy().reshape(-1)
            values = v.cpu().detach().numpy().reshape(-1)

            self.envs_send_actions(actions, terminated)
            if self.use_muscle:
                mt = Tensor(self.env.GetMuscleTorques())
                for _ in range(self.num_simulation_per_control//2):
                    dt = Tensor(self.env.GetDesiredTorques())
                    activations = self.muscle_model(mt,dt).cpu().detach().numpy()
                    self.env.SetActivationLevels(activations)

                    self.env.Steps(2)
            else:
                self.env.StepsAtOnce()

            rewards, is_done = self.envs_get_status(terminated)
            for j in range(self.num_slaves):
                if terminated[j]:
                    continue

                assertion_occur = rewards[j] is None
                nan_occur = np.any(np.isnan(states[j])) or np.any(np.isnan(actions[j])) or np.any(np.isnan(states[j])) or np.any(np.isnan(values[j])) or np.any(np.isnan(logprobs[j]))

                if not is_done[j] and not nan_occur and not assertion_occur:
                    episodes[j].Push(states[j], actions[j], rewards[j], values[j], logprobs[j])
                    local_step += 1

                if assertion_occur:
                    self.reinit_env(j)

                if is_done[j] or nan_occur:
                    self.total_episodes.append(episodes[j])

                    if local_step < self.buffer_size:
                        episodes[j] = EpisodeBuffer()
                        self.envs_reset(j, 1)
                    else:
                        terminated[j] = True
                else:
                    self.envs_reset(j, 0)

    def OptimizeSimulationNN(self):
        all_transitions = np.array(self.replay_buffer.buffer)
        for j in range(self.num_epochs):
            np.random.shuffle(all_transitions)
            for i in range(len(all_transitions)//self.batch_size):
                transitions = all_transitions[i*self.batch_size:(i+1)*self.batch_size]
                batch = Transition(*zip(*transitions))

                stack_s = np.vstack(batch.s).astype(np.float32)
                stack_a = np.vstack(batch.a).astype(np.float32)
                stack_lp = np.vstack(batch.logprob).astype(np.float32)
                stack_td = np.vstack(batch.TD).astype(np.float32)
                stack_gae = np.vstack(batch.GAE).astype(np.float32)

                a_dist,v = self.model(Tensor(stack_s))
                '''Critic Loss'''
                loss_critic = ((v-Tensor(stack_td)).pow(2)).mean()

                '''Actor Loss'''
                ratio = torch.exp(a_dist.log_prob(Tensor(stack_a))-Tensor(stack_lp))
                stack_gae = (stack_gae-stack_gae.mean())/(stack_gae.std()+ 1E-5)
                stack_gae = Tensor(stack_gae)
                surrogate1 = ratio * stack_gae
                surrogate2 = torch.clamp(ratio,min =1.0-self.clip_ratio,max=1.0+self.clip_ratio) * stack_gae
                loss_actor = - torch.min(surrogate1,surrogate2).mean()
                '''Entropy Loss'''
                loss_entropy = - self.w_entropy * a_dist.entropy().mean()

                self.loss_actor = loss_actor.cpu().detach().numpy().tolist()
                self.loss_critic = loss_critic.cpu().detach().numpy().tolist()

                loss = loss_actor + loss_entropy + loss_critic

                self.optimizer.zero_grad()
                loss.backward(retain_graph=True)
                for param in self.model.parameters():
                    if param.grad is not None:
                        param.grad.data.clamp_(-0.5,0.5)
                self.optimizer.step()
            # print('Optimizing sim nn : {}/{}'.format(j+1,self.num_epochs),end='\r')
        # print('')

    def OptimizeMuscleNN(self):
        muscle_transitions = np.array(self.muscle_buffer.buffer)
        for j in range(self.num_epochs_muscle):
            np.random.shuffle(muscle_transitions)
            for i in range(len(muscle_transitions)//self.muscle_batch_size):
                tuples = muscle_transitions[i*self.muscle_batch_size:(i+1)*self.muscle_batch_size]
                batch = MuscleTransition(*zip(*tuples))

                stack_JtA = np.vstack(batch.JtA).astype(np.float32)
                stack_tau_des = np.vstack(batch.tau_des).astype(np.float32)
                stack_L = np.vstack(batch.L).astype(np.float32)

                stack_L = stack_L.reshape(self.muscle_batch_size,self.num_action,self.num_muscles)
                stack_b = np.vstack(batch.b).astype(np.float32)

                stack_JtA = Tensor(stack_JtA)
                stack_tau_des = Tensor(stack_tau_des)
                stack_L = Tensor(stack_L)
                stack_b = Tensor(stack_b)

                activation = self.muscle_model(stack_JtA,stack_tau_des)
                tau = torch.einsum('ijk,ik->ij',(stack_L,activation)) + stack_b

                loss_reg = (activation).pow(2).mean()
                loss_target = (((tau-stack_tau_des)/100.0).pow(2)).mean()

                loss = 0.01*loss_reg + loss_target
                # loss = loss_target

                self.optimizer_muscle.zero_grad()
                loss.backward(retain_graph=True)
                for param in self.muscle_model.parameters():
                    if param.grad is not None:
                        param.grad.data.clamp_(-0.5,0.5)
                self.optimizer_muscle.step()

            # print('Optimizing muscle nn : {}/{}'.format(j+1,self.num_epochs_muscle),end='\r')
        self.loss_muscle = loss.cpu().detach().numpy().tolist()
        # print('')

    def OptimizeMarginalNN(self):
        marginal_transitions = np.array(self.marginal_buffer.buffer)
        for j in range(self.num_epochs):
            np.random.shuffle(marginal_transitions)
            for i in range(len(marginal_transitions)//self.batch_size):
                transitions = marginal_transitions[i*self.batch_size:(i+1)*self.batch_size]
                batch = MarginalTransition(*zip(*transitions))

                stack_sb = np.vstack(batch.sb).astype(np.float32)
                stack_v = np.vstack(batch.v).astype(np.float32)
                
                v = self.marginal_model(Tensor(stack_sb))
                
                # Marginal Loss
                loss_marginal = ((v-Tensor(stack_v)).pow(2)).mean()
                self.marginal_loss = loss_marginal.cpu().detach().numpy().tolist()
                self.marginal_optimizer.zero_grad()
                loss_marginal.backward(retain_graph=True)

                for param in self.marginal_model.parameters():
                    if param.grad is not None:
                        param.grad.data.clamp_(-0.5, 0.5)
                self.marginal_optimizer.step()

                # Marginal value average
                avg_marginal = Tensor(stack_v).mean().cpu().detach().numpy().tolist()
                self.marginal_value_avg -= self.marginal_learning_rate * (self.marginal_value_avg - avg_marginal)

            # print('Optimizing margin nn : {}/{}'.format(j+1, self.num_epochs), end='\r')
        # print('')

    def OptimizeModel(self):
        self.ComputeTDandGAE()
        self.OptimizeSimulationNN()
        if self.use_muscle:
            self.OptimizeMuscleNN()
        if self.use_adaptive_sampling:
            self.OptimizeMarginalNN()

    def Train(self):
        if self.use_adaptive_sampling and (self.idx % self.mcmc_period == 0):
            self.SampleStatesForMarginal()
        self.GenerateTransitions()
        self.OptimizeModel()
        self.idx += 1

    def Evaluate(self):
        self.num_evaluation = self.num_evaluation + 1
        h = int((time.time() - self.tic)//3600.0)
        m = int((time.time() - self.tic)//60.0)
        s = int((time.time() - self.tic))
        s = s - m*60
        m = m - h*60
        if self.num_episode is 0:
            self.num_episode = 1
        if self.num_tuple is 0:
            self.num_tuple = 1
        if self.max_return < self.sum_return/self.num_episode:
            self.max_return = self.sum_return/self.num_episode
            self.max_return_epoch = self.num_evaluation
        with open('../nn/log.txt', 'a') as f:
            f.write('# {} === {}h:{}m:{}s ===\n'.format(self.num_evaluation,h,m,s))
            f.write('||Loss Actor               : {:.4f}\n'.format(self.loss_actor))
            f.write('||Loss Critic              : {:.4f}\n'.format(self.loss_critic))
            if self.use_muscle:
                f.write('||Loss Muscle              : {:.4f}\n'.format(self.loss_muscle))
            if self.use_adaptive_sampling:
                f.write('||Loss Marginal            : {:.4f}\n'.format(self.marginal_loss))
            f.write('||Noise                    : {:.3f}\n'.format(self.model.log_std.exp().mean()))
            f.write('||Num Transition So far    : {}\n'.format(self.num_tuple_so_far))
            f.write('||Num Transition           : {}\n'.format(self.num_tuple))
            f.write('||Num Episode              : {}\n'.format(self.num_episode))
            f.write('||Avg Return per episode   : {:.3f}\n'.format(self.sum_return/self.num_episode))
            f.write('||Avg Reward per transition: {:.3f}\n'.format(self.sum_return/self.num_tuple))
            f.write('||Avg Step per episode     : {:.1f}\n'.format(self.num_tuple/self.num_episode))
            f.write('||Max Avg Retun So far     : {:.3f} at #{}\n'.format(self.max_return,self.max_return_epoch))
            f.write('=============================================\n')
        self.rewards.append(self.sum_return/self.num_episode)

        self.SaveModel()

        return np.array(self.rewards)
Example #5
0
class PushSim(object):
    def __init__(self, params, metadata_dir, nn_finding_dir=None):
        self.is_muscle, self.is_pushed_during_training, self.is_multi_seg_foot, self.is_walking_variance, self.is_walking_param_normal_trained, self.crouch = \
            params

        option = ''
        option += 'muscle_' if self.is_muscle else 'torque_'
        option += 'push_' if self.is_pushed_during_training else 'nopush_'
        option += 'msf_' if self.is_multi_seg_foot else 'sf_'

        assert self.crouch in ['0', '20', '30', '60', 'all']
        if self.crouch != 'all':
            option += 'crouch'
        option += self.crouch
        option += '_mean'
        if self.is_walking_variance:
            option += '_var_'
            option += 'normal' if self.is_walking_param_normal_trained else 'uniform'

        nn_dir = None
        if nn_finding_dir is not None:
            nn_dir = glob.glob(nn_finding_dir + option)[0]

        self.env = EnvWrapper(metadata_dir + option + '.txt')
        num_state = self.env.GetNumState()
        num_action = self.env.GetNumAction()
        num_actions = self.env.GetNumAction()

        self.nn_module = None

        if nn_dir is not None:
            self.nn_module = SimulationNN(num_state, num_action)
            self.nn_module.load(nn_dir + '/max.pt')

        self.muscle_nn_module = None

        if self.is_muscle and nn_dir is not None:
            num_total_muscle_related_dofs = self.env.GetNumTotalMuscleRelatedDofs(
            )
            num_muscles = self.env.GetNumMuscles()
            self.muscle_nn_module = MuscleNN(num_total_muscle_related_dofs,
                                             num_actions, num_muscles)
            self.muscle_nn_module.load(nn_dir + '/max_muscle.pt')

        self.walk_fsm = WalkFSM()
        self.push_step = 8
        self.push_duration = .2
        self.push_force = 50.
        self.push_start_timing = 50.

        self.step_length_ratio = 1.
        self.walk_speed_ratio = 1.
        self.duration_ratio = 1.

        self.info_start_time = 0.
        self.info_end_time = 0.
        self.info_root_pos = []
        self.info_left_foot_pos = []
        self.info_right_foot_pos = []

        self.push_start_time = 30.
        self.push_end_time = 0.
        self.walking_dir = np.zeros(3)

        self.pushed_step = 0
        self.pushed_length = 0

        self.max_detour_length = 0.
        self.max_detour_step_count = 0

        self.valid = True

    def GetActionFromNN(self):
        return self.nn_module.get_action(self.env.GetState())

    def GetActivationFromNN(self, mt):
        if not self.is_muscle:
            self.env.GetDesiredTorques()
            return np.zeros(self.env.GetNumMuscles())

        dt = self.env.GetDesiredTorques()
        return self.muscle_nn_module.get_activation(mt, dt)

    def step(self):
        num = self.env.GetSimulationHz() // self.env.GetControlHz()
        action = self.GetActionFromNN(
        ) if self.nn_module is not None else np.zeros(self.env.GetNumAction())
        self.env.SetAction(action)
        if self.is_muscle:
            inference_per_sim = 2
            for i in range(0, num, inference_per_sim):
                mt = self.env.GetMuscleTorques()
                self.env.SetActivationLevels(self.GetActivationFromNN(mt))
                for j in range(inference_per_sim):
                    if self.push_start_time <= self.env.GetSimulationTime(
                    ) <= self.push_end_time:
                        self.env.AddBodyExtForce(
                            "ArmL", np.array([self.push_force, 0., 0.]))

                    self.env.Step()
        else:
            if self.push_start_time <= self.env.GetSimulationTime(
            ) <= self.push_end_time:
                self.env.AddBodyExtForce("ArmL",
                                         np.array([self.push_force, 0., 0.]))
            self.env.Step()

    def reset(self, rsi=True):
        self.env.Reset(rsi)
        self.env.PrintWalkingParamsSampled()

    def simulate(self):
        self.env.PrintWalkingParamsSampled()
        self.info_start_time = 0.
        self.info_end_time = 0.
        self.info_root_pos = []
        self.info_left_foot_pos = []
        self.info_right_foot_pos = []

        self.push_start_time = 30.
        self.push_end_time = 0.
        self.walking_dir = np.zeros(3)

        self.pushed_step = 0
        self.pushed_length = 0

        self.valid = True

        self.walk_fsm.reset()

        self.max_detour_length = 0.
        self.max_detour_step_count = 0

        while True:
            bool_l = self.env.IsBodyContact(
                "TalusL") or self.env.IsBodyContact(
                    "FootThumbL") or self.env.IsBodyContact("FootPinkyL")
            bool_r = self.env.IsBodyContact(
                "TalusR") or self.env.IsBodyContact(
                    "FootThumbR") or self.env.IsBodyContact("FootPinkyR")
            last_step_count = copy.deepcopy(self.walk_fsm.step_count)
            self.walk_fsm.check(bool_l, bool_r)

            if last_step_count == self.walk_fsm.step_count - 1:
                print(last_step_count, '->', self.walk_fsm.step_count,
                      self.env.GetSimulationTime())

            if last_step_count == 3 and self.walk_fsm.step_count == 4:
                self.info_start_time = self.env.GetSimulationTime()
                self.info_root_pos.append(self.env.GetBodyPosition("Pelvis"))
                self.info_root_pos[0][1] = 0.
                if self.walk_fsm.last_sw == 'r':
                    self.info_right_foot_pos.append(
                        self.env.GetBodyPosition("TalusR"))
                elif self.walk_fsm.last_sw == 'l':
                    self.info_left_foot_pos.append(
                        self.env.GetBodyPosition("TalusL"))

            if last_step_count == 4 and self.walk_fsm.step_count == 5:
                # info_root_pos.append(self.env.GetBodyPosition("Pelvis"))
                if self.walk_fsm.last_sw == 'r':
                    self.info_right_foot_pos.append(
                        self.env.GetBodyPosition("TalusR"))
                elif self.walk_fsm.last_sw == 'l':
                    self.info_left_foot_pos.append(
                        self.env.GetBodyPosition("TalusL"))

            if last_step_count == 5 and self.walk_fsm.step_count == 6:
                # info_root_pos.append(self.env.GetBodyPosition("Pelvis"))
                if self.walk_fsm.last_sw == 'r':
                    self.info_right_foot_pos.append(
                        self.env.GetBodyPosition("TalusR"))
                elif self.walk_fsm.last_sw == 'l':
                    self.info_left_foot_pos.append(
                        self.env.GetBodyPosition("TalusL"))

            if last_step_count == 6 and self.walk_fsm.step_count == 7:
                # info_root_pos.append(self.env.GetBodyPosition("Pelvis"))
                if self.walk_fsm.last_sw == 'r':
                    self.info_right_foot_pos.append(
                        self.env.GetBodyPosition("TalusR"))
                elif self.walk_fsm.last_sw == 'l':
                    self.info_left_foot_pos.append(
                        self.env.GetBodyPosition("TalusL"))

            if last_step_count == 7 and self.walk_fsm.step_count == 8:
                print(self.env.GetBodyPosition("TalusL")[2])
                print(self.env.GetBodyPosition("TalusR")[2])
                self.info_end_time = self.env.GetSimulationTime()
                self.info_root_pos.append(self.env.GetBodyPosition("Pelvis"))
                self.info_root_pos[1][1] = 0.
                if self.walk_fsm.last_sw == 'r':
                    self.info_right_foot_pos.append(
                        self.env.GetBodyPosition("TalusR"))
                elif self.walk_fsm.last_sw == 'l':
                    self.info_left_foot_pos.append(
                        self.env.GetBodyPosition("TalusL"))
                20, 0.807417, 0.930071, 3.1266666666667

                self.walking_dir = self.info_root_pos[1] - self.info_root_pos[0]
                self.walking_dir[1] = 0.
                self.walking_dir /= np.linalg.norm(self.walking_dir)

                self.push_start_time = self.env.GetSimulationTime() + (
                    self.push_start_timing /
                    100.) * self.env.GetMotionHalfCycleDuration()
                self.push_end_time = self.push_start_time + self.push_duration
                print("push at ", self.push_start_time)

            if self.env.GetSimulationTime() >= self.push_start_time:
                root_pos_plane = self.env.GetBodyPosition("Pelvis")
                root_pos_plane[1] = 0.
                detour_length = calculate_distance_to_line(
                    root_pos_plane, self.walking_dir, self.info_root_pos[0])
                if self.max_detour_length < detour_length:
                    self.max_detour_length = detour_length
                    self.max_detour_step_count = self.walk_fsm.step_count

            if self.env.GetSimulationTime() >= self.push_start_time + 10.:
                break

            if self.env.GetBodyPosition("Pelvis")[1] < 0.3:
                print("fallen at ", self.walk_fsm.step_count,
                      self.env.GetSimulationTime(), 's')
                self.valid = False
                break

            # print(self.env.GetBodyPosition("Pelvis"))
            # print(self.walk_fsm.step_count)

            self.step()
        print('end!', self.valid)

    def setParamedStepParams(self, crouch_angle, step_length_ratio,
                             walk_speed_ratio):
        self.step_length_ratio = step_length_ratio
        self.walk_speed_ratio = walk_speed_ratio
        self.duration_ratio = step_length_ratio / walk_speed_ratio
        self.env.SetWalkingParams(int(crouch_angle), step_length_ratio,
                                  walk_speed_ratio)

    def setPushParams(self, push_step, push_duration, push_force,
                      push_start_timing):
        self.push_step = push_step
        self.push_duration = push_duration
        self.push_force = push_force / 2.
        self.push_start_timing = push_start_timing
        self.env.SetPushParams(push_step, push_duration, push_force,
                               push_start_timing)

    def getPushedLength(self):
        return self.max_detour_length

    def getPushedStep(self):
        return self.max_detour_step_count

    def getStepLength(self):
        sum_stride_length = 0.
        stride_info_num = 0
        for info_foot_pos in [
                self.info_right_foot_pos, self.info_left_foot_pos
        ]:
            for i in range(len(info_foot_pos) - 1):
                stride_vec = info_foot_pos[i + 1] - info_foot_pos[i]
                stride_vec[1] = 0.
                sum_stride_length += np.linalg.norm(stride_vec)
                stride_info_num += 1

        return sum_stride_length / stride_info_num

    def getWalkingSpeed(self):
        walking_vec = self.info_root_pos[1] - self.info_root_pos[0]
        walking_vec[1] = 0.
        distance = np.linalg.norm(walking_vec)
        return distance / (self.info_end_time - self.info_start_time)

    def getStartTimingFootIC(self):
        return 0.

    def getMidTimingFootIC(self):
        return 0.

    def getStartTimingTimeFL(self):
        return 0.

    def getMidTimingTimeFL(self):
        return 0.

    def getStartTimingFootFL(self):
        return 0.

    def getMidTimingFootFL(self):
        return 0.