Beispiel #1
0
version = 3
# 0: constant forward velocities
# 1: consecutive sinks forward for walking
# 2: consecutive sinks for walking (-90 < th < 90) (Round 1)
# 3: consecutive sinks for walking (-180 < th < 180) (Round 2)

dt = .01
#dt = .5
pose_agent = np.array([0, 0, 0])  # [x, y]

vtgt_v1 = VTgtField(version=version, dt=dt)
vtgt_v1.reset(version=version, seed=0)

t_sim = 25
x = 0
y = 0
th = 0
pose_t = np.array([[x], [y], [th]])

for t in np.arange(0, t_sim, dt):
    pose = np.array([x, y, th])  # [x, y, theta]
    vtgt_field_local, flag_new_target = vtgt_v1.update(pose)

    vtgt = vtgt_v1.get_vtgt(pose[0:2])

    x += np.asscalar(vtgt[0]) * dt
    y += np.asscalar(vtgt[1]) * dt

    if flag_new_target:
        print('Target achieved at {} sec'.format(t))
Beispiel #2
0
class L2M2019Env(OsimEnv):
    # to change later:
    # muscle v: normalize by max_contraction_velocity, 15 lopt / s
    model = '3D'

    # from gait14dof22musc_20170320.osim
    MASS = 75.16460000000001  # 11.777 + 2*(9.3014 + 3.7075 + 0.1 + 1.25 + 0.2166) + 34.2366
    G = 9.80665  # from gait1dof22muscle

    LENGTH0 = 1  # leg length

    footstep = {}
    footstep['n'] = 0
    footstep['new'] = False
    footstep['r_contact'] = 0
    footstep['l_contact'] = 0

    dict_muscle = {
        'abd': 'HAB',
        'add': 'HAD',
        'iliopsoas': 'HFL',
        'glut_max': 'GLU',
        'hamstrings': 'HAM',
        'rect_fem': 'RF',
        'vasti': 'VAS',
        'bifemsh': 'BFSH',
        'gastroc': 'GAS',
        'soleus': 'SOL',
        'tib_ant': 'TA'
    }

    act2mus = [
        0, 1, 4, 7, 3, 2, 5, 6, 8, 9, 10, 11, 12, 15, 18, 14, 13, 16, 17, 19,
        20, 21
    ]
    # maps muscle order in action to muscle order in gait14dof22musc_20170320.osim
    # muscle order in action
    #    HAB, HAD, HFL, GLU, HAM, RF, VAS, BFSH, GAS, SOL, TA
    # muscle order in gait14dof22musc_20170320.osim
    #    HAB, HAD, HAM, BFSH, GLU, HFL, RF, VAS, GAS, SOL, TA
    #    or abd, add, hamstrings, bifemsh, glut_max, iliopsoas, rect_fem, vasti, gastroc, soleus, tib_ant

    INIT_POSE = np.array([
        0,  # forward speed
        0,  # rightward speed
        0.94,  # pelvis height
        0 * np.pi / 180,  # trunk lean
        0 * np.pi / 180,  # [right] hip adduct
        0 * np.pi / 180,  # hip flex
        0 * np.pi / 180,  # knee extend
        0 * np.pi / 180,  # ankle flex
        0 * np.pi / 180,  # [left] hip adduct
        0 * np.pi / 180,  # hip flex
        0 * np.pi / 180,  # knee extend
        0 * np.pi / 180
    ])  # ankle flex

    obs_vtgt_space = np.array([[-10] * 2 * 11 * 11, [10] * 2 * 11 * 11])

    obs_body_space = np.array([[-1.0] * 97, [1.0] * 97])
    obs_body_space[:, 0] = [0, 3]  # pelvis height
    obs_body_space[:, 1] = [-np.pi, np.pi]  # pelvis pitch
    obs_body_space[:, 2] = [-np.pi, np.pi]  # pelvis roll
    obs_body_space[:, 3] = [-20, 20]  # pelvis vel (forward)
    obs_body_space[:, 4] = [-20, 20]  # pelvis vel (leftward)
    obs_body_space[:, 5] = [-20, 20]  # pelvis vel (upward)
    obs_body_space[:, 6] = [-10 * np.pi,
                            10 * np.pi]  # pelvis angular vel (pitch)
    obs_body_space[:, 7] = [-10 * np.pi,
                            10 * np.pi]  # pelvis angular vel (roll)
    obs_body_space[:, 8] = [-10 * np.pi,
                            10 * np.pi]  # pelvis angular vel (yaw)
    obs_body_space[:, [9 + x for x in [0, 44]]] = np.array(
        [[-5, 5]]).transpose(
        )  # (r,l) ground reaction force normalized to bodyweight (forward)
    obs_body_space[:, [10 + x for x in [0, 44]]] = np.array(
        [[-5, 5]]).transpose(
        )  # (r, l) ground reaction force normalized to bodyweight (rightward)
    obs_body_space[:, [11 + x for x in [0, 44]]] = np.array(
        [[-10, 10]]).transpose(
        )  # (r, l) ground reaction force normalized to bodyweight (upward)
    obs_body_space[:, [12 + x for x in [0, 44]]] = np.array(
        [[-45 * np.pi / 180,
          90 * np.pi / 180]]).transpose()  # (r, l) joint: (+) hip abduction
    obs_body_space[:, [13 + x for x in [0, 44]]] = np.array(
        [[-180 * np.pi / 180,
          45 * np.pi / 180]]).transpose()  # (r, l) joint: (+) hip extension
    obs_body_space[:, [14 + x for x in [0, 44]]] = np.array(
        [[-180 * np.pi / 180,
          0]]).transpose()  # (r, l) joint: (+) knee extension
    obs_body_space[:, [15 + x for x in [0, 44]]] = np.array([[
        -45 * np.pi / 180, 90 * np.pi / 180
    ]]).transpose()  # (r, l) joint: (+) ankle extension (plantarflexion)
    obs_body_space[:, [16 + x for x in [0, 44]]] = np.array(
        [[-5 * np.pi,
          5 * np.pi]]).transpose()  # (r, l) joint: (+) hip abduction
    obs_body_space[:, [17 + x for x in [0, 44]]] = np.array(
        [[-5 * np.pi,
          5 * np.pi]]).transpose()  # (r, l) joint: (+) hip extension
    obs_body_space[:, [18 + x for x in [0, 44]]] = np.array(
        [[-5 * np.pi,
          5 * np.pi]]).transpose()  # (r, l) joint: (+) knee extension
    obs_body_space[:, [19 + x for x in [0, 44]]] = np.array([[
        -5 * np.pi, 5 * np.pi
    ]]).transpose()  # (r, l) joint: (+) ankle extension (plantarflexion)
    obs_body_space[:, [
        20 + x for x in list(range(0, 33, 3)) + list(range(44, 77, 3))
    ]] = np.array([[0, 2]]).transpose(
    )  # (r, l) muscle forces, normalized to maximum isometric force
    obs_body_space[:, [
        21 + x for x in list(range(0, 33, 3)) + list(range(44, 77, 3))
    ]] = np.array([[
        0, 2
    ]]).transpose()  # (r, l) muscle lengths, normalized to optimal length
    obs_body_space[:, [
        22 + x for x in list(range(0, 33, 3)) + list(range(44, 77, 3))
    ]] = np.array([[-20, 20]]).transpose(
    )  # (r, l) muscle velocities, normalized to optimal length per second

    def get_model_key(self):
        return self.model

    def set_difficulty(self, difficulty):
        self.difficulty = difficulty
        if difficulty == 0:
            self.time_limit = 1000
        if difficulty == 1:
            self.time_limit = 1000
        if difficulty == 2:
            self.time_limit = 1000
        self.spec.timestep_limit = self.time_limit

    def __init__(self,
                 visualize=True,
                 integrator_accuracy=5e-5,
                 difficulty=2,
                 seed=0,
                 report=None):
        if difficulty not in [0, 1, 2]:
            raise ValueError("difficulty level should be in [0, 1, 2].")
        self.model_paths = {}
        self.model_paths['3D'] = os.path.join(
            os.path.dirname(__file__),
            '../models/gait14dof22musc_20170320.osim')
        self.model_paths['2D'] = os.path.join(
            os.path.dirname(__file__),
            '../models/gait14dof22musc_planar_20170320.osim')
        self.model_path = self.model_paths[self.get_model_key()]
        super(L2M2019Env,
              self).__init__(visualize=visualize,
                             integrator_accuracy=integrator_accuracy)

        self.Fmax = {}
        self.lopt = {}
        for leg, side in zip(['r_leg', 'l_leg'], ['r', 'l']):
            self.Fmax[leg] = {}
            self.lopt[leg] = {}
            for MUS, mus in zip([
                    'HAB', 'HAD', 'HFL', 'GLU', 'HAM', 'RF', 'VAS', 'BFSH',
                    'GAS', 'SOL', 'TA'
            ], [
                    'abd', 'add', 'iliopsoas', 'glut_max', 'hamstrings',
                    'rect_fem', 'vasti', 'bifemsh', 'gastroc', 'soleus',
                    'tib_ant'
            ]):
                muscle = self.osim_model.muscleSet.get('{}_{}'.format(
                    mus, side))
                Fmax = muscle.getMaxIsometricForce()
                lopt = muscle.getOptimalFiberLength()
                self.Fmax[leg][MUS] = muscle.getMaxIsometricForce()
                self.lopt[leg][MUS] = muscle.getOptimalFiberLength()

        self.set_difficulty(difficulty)

        if report:
            bufsize = 0
            self.observations_file = open('%s-obs.csv' % (report, ), 'w',
                                          bufsize)
            self.actions_file = open('%s-act.csv' % (report, ), 'w', bufsize)
            self.get_headers()

        # create target velocity field
        from envs.target import VTgtField
        self.vtgt = VTgtField(visualize=visualize,
                              version=self.difficulty,
                              dt=self.osim_model.stepsize)
        self.obs_vtgt_space = self.vtgt.vtgt_space

    def reset(self, project=True, seed=None, init_pose=None, obs_as_dict=True):
        self.t = 0
        self.init_reward()
        self.vtgt.reset(version=self.difficulty, seed=seed)

        # initialize state
        self.osim_model.state = self.osim_model.model.initializeState()
        if init_pose is None:
            init_pose = self.INIT_POSE
        state = self.osim_model.get_state()
        QQ = state.getQ()
        QQDot = state.getQDot()
        for i in range(17):
            QQDot[i] = 0
        QQ[3] = 0  # x: (+) forward
        QQ[5] = 0  # z: (+) right
        QQ[1] = 0 * np.pi / 180  # roll
        QQ[2] = 0 * np.pi / 180  # yaw
        QQDot[3] = init_pose[0]  # forward speed
        QQDot[5] = init_pose[1]  # forward speed
        QQ[4] = init_pose[2]  # pelvis height
        QQ[0] = -init_pose[3]  # trunk lean: (+) backward
        QQ[7] = -init_pose[4]  # right hip abduct
        QQ[6] = -init_pose[5]  # right hip flex
        QQ[13] = init_pose[6]  # right knee extend
        QQ[15] = -init_pose[7]  # right ankle flex
        QQ[10] = -init_pose[8]  # left hip adduct
        QQ[9] = -init_pose[9]  # left hip flex
        QQ[14] = init_pose[10]  # left knee extend
        QQ[16] = -init_pose[11]  # left ankle flex

        state.setQ(QQ)
        state.setU(QQDot)
        self.osim_model.set_state(state)
        self.osim_model.model.equilibrateMuscles(self.osim_model.state)

        self.osim_model.state.setTime(0)
        self.osim_model.istep = 0

        self.osim_model.reset_manager()

        d = super(L2M2019Env, self).get_state_desc()
        pose = np.array([
            d['body_pos']['pelvis'][0], -d['body_pos']['pelvis'][2],
            d['joint_pos']['ground_pelvis'][2]
        ])
        self.v_tgt_field, self.flag_new_v_tgt_field = self.vtgt.update(pose)

        if not project:
            return self.get_state_desc()
        if obs_as_dict:
            return self.get_observation_dict()
        return self.get_observation()

    def load_model(self, model_path=None):
        super(L2M2019Env, self).load_model(model_path)
        observation_space = np.concatenate(
            (self.obs_vtgt_space, self.obs_body_space), axis=1)
        self.observation_space = convert_to_gym(observation_space)

    def step(self, action, project=True, obs_as_dict=False):
        action_mapped = [action[i] for i in self.act2mus]
        observation, reward, done, info = super(L2M2019Env, self).step(
            action_mapped, project=project, obs_as_dict=obs_as_dict)
        self.t += self.osim_model.stepsize
        self.update_footstep()

        d = super(L2M2019Env, self).get_state_desc()
        self.pose = np.array([
            d['body_pos']['pelvis'][0], -d['body_pos']['pelvis'][2],
            d['joint_pos']['ground_pelvis'][2]
        ])
        self.v_tgt_field, self.flag_new_v_tgt_field = self.vtgt.update(
            self.pose)

        return observation, reward, done, info

    def change_model(self, model='3D', difficulty=2, seed=0):
        if self.model != model:
            self.model = model
            self.load_model(self.model_paths[self.get_model_key()])
        self.set_difficulty(difficulty)

    def is_done(self):
        state_desc = self.get_state_desc()
        return state_desc['body_pos']['pelvis'][1] < 0.6

    def update_footstep(self):
        state_desc = self.get_state_desc()

        # update contact
        r_contact = True if state_desc['forces']['foot_r'][1] < -0.05 * (
            self.MASS * self.G) else False
        l_contact = True if state_desc['forces']['foot_l'][1] < -0.05 * (
            self.MASS * self.G) else False

        self.footstep['new'] = False
        if (not self.footstep['r_contact']
                and r_contact) or (not self.footstep['l_contact']
                                   and l_contact):
            self.footstep['new'] = True
            self.footstep['n'] += 1

        self.footstep['r_contact'] = r_contact
        self.footstep['l_contact'] = l_contact

    def get_observation_dict(self):
        state_desc = self.get_state_desc()

        obs_dict = {}

        obs_dict['v_tgt_field'] = state_desc['v_tgt_field']

        # pelvis state (in local frame)
        obs_dict['pelvis'] = {}
        obs_dict['pelvis']['height'] = state_desc['body_pos']['pelvis'][1]
        obs_dict['pelvis']['pitch'] = -state_desc['joint_pos'][
            'ground_pelvis'][0]  # (+) pitching forward
        obs_dict['pelvis']['roll'] = state_desc['joint_pos']['ground_pelvis'][
            1]  # (+) rolling around the forward axis (to the right)
        yaw = state_desc['joint_pos']['ground_pelvis'][2]
        dx_local, dy_local = rotate_frame(state_desc['body_vel']['pelvis'][0],
                                          state_desc['body_vel']['pelvis'][2],
                                          yaw)
        dz_local = state_desc['body_vel']['pelvis'][1]
        obs_dict['pelvis']['vel'] = [
            dx_local,  # (+) forward
            -dy_local,  # (+) leftward
            dz_local,  # (+) upward
            -state_desc['joint_vel']['ground_pelvis']
            [0],  # (+) pitch angular velocity
            state_desc['joint_vel']['ground_pelvis']
            [1],  # (+) roll angular velocity
            state_desc['joint_vel']['ground_pelvis'][2]
        ]  # (+) yaw angular velocity

        # leg state
        for leg, side in zip(['r_leg', 'l_leg'], ['r', 'l']):
            obs_dict[leg] = {}
            grf = [
                f / (self.MASS * self.G)
                for f in state_desc['forces']['foot_{}'.format(side)][0:3]
            ]  # forces normalized by bodyweight
            grm = [
                m / (self.MASS * self.G)
                for m in state_desc['forces']['foot_{}'.format(side)][3:6]
            ]  # forces normalized by bodyweight
            grfx_local, grfy_local = rotate_frame(-grf[0], -grf[2], yaw)
            if leg == 'r_leg':
                obs_dict[leg]['ground_reaction_forces'] = [
                    grfx_local,  # (+) forward
                    grfy_local,  # (+) lateral (rightward)
                    -grf[1]
                ]  # (+) upward
            if leg == 'l_leg':
                obs_dict[leg]['ground_reaction_forces'] = [
                    grfx_local,  # (+) forward
                    -grfy_local,  # (+) lateral (leftward)
                    -grf[1]
                ]  # (+) upward

            # joint angles
            obs_dict[leg]['joint'] = {}
            obs_dict[leg]['joint']['hip_abd'] = -state_desc['joint_pos'][
                'hip_{}'.format(side)][1]  # (+) hip abduction
            obs_dict[leg]['joint']['hip'] = -state_desc['joint_pos'][
                'hip_{}'.format(side)][0]  # (+) extension
            obs_dict[leg]['joint']['knee'] = state_desc['joint_pos'][
                'knee_{}'.format(side)][0]  # (+) extension
            obs_dict[leg]['joint']['ankle'] = -state_desc['joint_pos'][
                'ankle_{}'.format(side)][0]  # (+) extension
            # joint angular velocities
            obs_dict[leg]['d_joint'] = {}
            obs_dict[leg]['d_joint']['hip_abd'] = -state_desc['joint_vel'][
                'hip_{}'.format(side)][1]  # (+) hip abduction
            obs_dict[leg]['d_joint']['hip'] = -state_desc['joint_vel'][
                'hip_{}'.format(side)][0]  # (+) extension
            obs_dict[leg]['d_joint']['knee'] = state_desc['joint_vel'][
                'knee_{}'.format(side)][0]  # (+) extension
            obs_dict[leg]['d_joint']['ankle'] = -state_desc['joint_vel'][
                'ankle_{}'.format(side)][0]  # (+) extension

            # muscles
            for MUS, mus in zip([
                    'HAB', 'HAD', 'HFL', 'GLU', 'HAM', 'RF', 'VAS', 'BFSH',
                    'GAS', 'SOL', 'TA'
            ], [
                    'abd', 'add', 'iliopsoas', 'glut_max', 'hamstrings',
                    'rect_fem', 'vasti', 'bifemsh', 'gastroc', 'soleus',
                    'tib_ant'
            ]):
                obs_dict[leg][MUS] = {}
                obs_dict[leg][MUS]['f'] = state_desc['muscles']['{}_{}'.format(
                    mus, side)]['fiber_force'] / self.Fmax[leg][MUS]
                obs_dict[leg][MUS]['l'] = state_desc['muscles']['{}_{}'.format(
                    mus, side)]['fiber_length'] / self.lopt[leg][MUS]
                obs_dict[leg][MUS]['v'] = state_desc['muscles']['{}_{}'.format(
                    mus, side)]['fiber_velocity'] / self.lopt[leg][MUS]

        return obs_dict

    ## Values in the observation vector
    # 'vtgt_field': vtgt vectors in body frame (2*11*11 = 242 values)
    # 'pelvis': height, pitch, roll, 6 vel (9 values)
    # for each 'r_leg' and 'l_leg' (*2)
    #   'ground_reaction_forces' (3 values)
    #   'joint' (4 values)
    #   'd_joint' (4 values)
    #   for each of the eleven muscles (*11)
    #       normalized 'f', 'l', 'v' (3 values)
    # 242 + 9 + 2*(3 + 4 + 4 + 11*3) = 339
    def get_observation(self):
        obs_dict = self.get_observation_dict()

        # Augmented environment from the L2R challenge
        res = []

        # target velocity field (in body frame)
        v_tgt = np.ndarray.flatten(obs_dict['v_tgt_field'])
        res += v_tgt.tolist()

        res.append(obs_dict['pelvis']['height'])
        res.append(obs_dict['pelvis']['pitch'])
        res.append(obs_dict['pelvis']['roll'])
        res.append(obs_dict['pelvis']['vel'][0] / self.LENGTH0)
        res.append(obs_dict['pelvis']['vel'][1] / self.LENGTH0)
        res.append(obs_dict['pelvis']['vel'][2] / self.LENGTH0)
        res.append(obs_dict['pelvis']['vel'][3])
        res.append(obs_dict['pelvis']['vel'][4])
        res.append(obs_dict['pelvis']['vel'][5])

        for leg in ['r_leg', 'l_leg']:
            res += obs_dict[leg]['ground_reaction_forces']
            res.append(obs_dict[leg]['joint']['hip_abd'])
            res.append(obs_dict[leg]['joint']['hip'])
            res.append(obs_dict[leg]['joint']['knee'])
            res.append(obs_dict[leg]['joint']['ankle'])
            res.append(obs_dict[leg]['d_joint']['hip_abd'])
            res.append(obs_dict[leg]['d_joint']['hip'])
            res.append(obs_dict[leg]['d_joint']['knee'])
            res.append(obs_dict[leg]['d_joint']['ankle'])
            for MUS in [
                    'HAB', 'HAD', 'HFL', 'GLU', 'HAM', 'RF', 'VAS', 'BFSH',
                    'GAS', 'SOL', 'TA'
            ]:
                res.append(obs_dict[leg][MUS]['f'])
                res.append(obs_dict[leg][MUS]['l'])
                res.append(obs_dict[leg][MUS]['v'])
        return res

    def get_observation_space_size(self):
        return 339

    def get_state_desc(self):
        d = super(L2M2019Env, self).get_state_desc()
        #state_desc['joint_pos']
        #state_desc['joint_vel']
        #state_desc['joint_acc']
        #state_desc['body_pos']
        #state_desc['body_vel']
        #state_desc['body_acc']
        #state_desc['body_pos_rot']
        #state_desc['body_vel_rot']
        #state_desc['body_acc_rot']
        #state_desc['forces']
        #state_desc['muscles']
        #state_desc['markers']
        #state_desc['misc']
        if self.difficulty in [0, 1, 2]:
            d['v_tgt_field'] = self.v_tgt_field  # shape: (2, 11, 11)
        else:
            raise ValueError("difficulty level should be in [0, 1, 2].")
        return d

    def init_reward(self):
        self.init_reward_1()

    def init_reward_1(self):
        self.d_reward = {}

        self.d_reward['weight'] = {}
        self.d_reward['weight']['footstep'] = 10
        self.d_reward['weight']['effort'] = 1
        self.d_reward['weight']['v_tgt'] = 1

        self.d_reward['alive'] = 0.1
        self.d_reward['effort'] = 0

        self.d_reward['footstep'] = {}
        self.d_reward['footstep']['effort'] = 0
        self.d_reward['footstep']['del_t'] = 0
        self.d_reward['footstep']['del_v'] = 0

    def get_reward(self):
        return self.get_reward_1()

    def get_reward_1(self):
        state_desc = self.get_state_desc()
        if not self.get_prev_state_desc():
            return 0

        reward = 0
        dt = self.osim_model.stepsize

        # alive reward
        # should be large enough to search for 'success' solutions (alive to the end) first
        reward += self.d_reward['alive']

        # effort ~ muscle fatigue ~ (muscle activation)^2
        ACT2 = 0
        for muscle in sorted(state_desc['muscles'].keys()):
            ACT2 += np.square(state_desc['muscles'][muscle]['activation'])
        self.d_reward['effort'] += ACT2 * dt
        self.d_reward['footstep']['effort'] += ACT2 * dt

        self.d_reward['footstep']['del_t'] += dt

        # reward from velocity (penalize from deviating from v_tgt)

        p_body = [
            state_desc['body_pos']['pelvis'][0],
            -state_desc['body_pos']['pelvis'][2]
        ]
        v_body = [
            state_desc['body_vel']['pelvis'][0],
            -state_desc['body_vel']['pelvis'][2]
        ]
        v_tgt = self.vtgt.get_vtgt(p_body).T

        self.d_reward['footstep']['del_v'] += (v_body - v_tgt) * dt

        # footstep reward (when made a new step)
        if self.footstep['new']:
            # footstep reward: so that solution does not avoid making footsteps
            # scaled by del_t, so that solution does not get higher rewards by making unnecessary (small) steps
            reward_footstep_0 = self.d_reward['weight'][
                'footstep'] * self.d_reward['footstep']['del_t']

            # deviation from target velocity
            # the average velocity a step (instead of instantaneous velocity) is used
            # as velocity fluctuates within a step in normal human walking
            #reward_footstep_v = -self.reward_w['v_tgt']*(self.footstep['del_vx']**2)
            reward_footstep_v = -self.d_reward['weight'][
                'v_tgt'] * np.linalg.norm(
                    self.d_reward['footstep']['del_v']) / self.LENGTH0

            # panalize effort
            reward_footstep_e = -self.d_reward['weight'][
                'effort'] * self.d_reward['footstep']['effort']

            self.d_reward['footstep']['del_t'] = 0
            self.d_reward['footstep']['del_v'] = 0
            self.d_reward['footstep']['effort'] = 0

            reward += reward_footstep_0 + reward_footstep_v + reward_footstep_e

        # success bonus
        if not self.is_done() and (self.osim_model.istep >=
                                   self.spec.timestep_limit
                                   ):  #and self.failure_mode is 'success':
            # retrieve reward (i.e. do not penalize for the simulation terminating in a middle of a step)
            reward_footstep_0 = self.d_reward['weight'][
                'footstep'] * self.d_reward['footstep']['del_t']
            reward += reward_footstep_0 + 100

        return reward
Beispiel #3
0
class L2M2019Env(OsimEnv):
# to change later:
# muscle v: normalize by max_contraction_velocity, 15 lopt / s
    model = '3D'

    # from gait14dof22musc_20170320.osim
    MASS = 75.16460000000001 # 11.777 + 2*(9.3014 + 3.7075 + 0.1 + 1.25 + 0.2166) + 34.2366
    G = 9.80665 # from gait1dof22muscle

    LENGTH0 = 1 # leg length

    footstep = {}
    footstep['n'] = 0
    footstep['new'] = False
    footstep['r_contact'] = 1
    footstep['l_contact'] = 1

    dict_muscle = { 'abd': 'HAB',
                    'add': 'HAD',
                    'iliopsoas': 'HFL',
                    'glut_max': 'GLU',
                    'hamstrings': 'HAM',
                    'rect_fem': 'RF',
                    'vasti': 'VAS',
                    'bifemsh': 'BFSH',
                    'gastroc': 'GAS',
                    'soleus': 'SOL',
                    'tib_ant': 'TA'}

    act2mus = [0, 1, 4, 7, 3, 2, 5, 6, 8, 9, 10, 11, 12, 15, 18, 14, 13, 16, 17, 19, 20, 21]
    # maps muscle order in action to muscle order in gait14dof22musc_20170320.osim
    # muscle order in action
    #    HAB, HAD, HFL, GLU, HAM, RF, VAS, BFSH, GAS, SOL, TA
    # muscle order in gait14dof22musc_20170320.osim
    #    HAB, HAD, HAM, BFSH, GLU, HFL, RF, VAS, GAS, SOL, TA
    #    or abd, add, hamstrings, bifemsh, glut_max, iliopsoas, rect_fem, vasti, gastroc, soleus, tib_ant

    INIT_POSE = np.array([
        0, # forward speed
        0, # rightward speed
        0.94, # pelvis height
        0*np.pi/180, # trunk lean
        0*np.pi/180, # [right] hip adduct
        0*np.pi/180, # hip flex
        0*np.pi/180, # knee extend
        0*np.pi/180, # ankle flex
        0*np.pi/180, # [left] hip adduct
        0*np.pi/180, # hip flex
        0*np.pi/180, # knee extend
        0*np.pi/180]) # ankle flex

    obs_vtgt_space = np.array([[-10] * 2*11*11, [10] * 2*11*11])

    obs_body_space = np.array([[-1.0] * 97, [1.0] * 97])
    obs_body_space[:,0] = [0, 3] # pelvis height
    obs_body_space[:,1] = [-np.pi, np.pi] # pelvis pitch
    obs_body_space[:,2] = [-np.pi, np.pi] # pelvis roll
    obs_body_space[:,3] = [-20, 20] # pelvis vel (forward)
    obs_body_space[:,4] = [-20, 20] # pelvis vel (leftward)
    obs_body_space[:,5] = [-20, 20] # pelvis vel (upward)
    obs_body_space[:,6] = [-10*np.pi, 10*np.pi] # pelvis angular vel (pitch)
    obs_body_space[:,7] = [-10*np.pi, 10*np.pi] # pelvis angular vel (roll)
    obs_body_space[:,8] = [-10*np.pi, 10*np.pi] # pelvis angular vel (yaw)
    obs_body_space[:,[9 + x for x in [0, 44]]] = np.array([[-5, 5]]).transpose() # (r,l) ground reaction force normalized to bodyweight (forward)
    obs_body_space[:,[10 + x for x in [0, 44]]] = np.array([[-5, 5]]).transpose() # (r, l) ground reaction force normalized to bodyweight (rightward)
    obs_body_space[:,[11 + x for x in [0, 44]]] = np.array([[-10, 10]]).transpose() # (r, l) ground reaction force normalized to bodyweight (upward)
    obs_body_space[:,[12 + x for x in [0, 44]]] = np.array([[-45*np.pi/180, 90*np.pi/180]]).transpose() # (r, l) joint: (+) hip abduction
    obs_body_space[:,[13 + x for x in [0, 44]]] = np.array([[-180*np.pi/180, 45*np.pi/180]]).transpose() # (r, l) joint: (+) hip extension
    obs_body_space[:,[14 + x for x in [0, 44]]] = np.array([[-180*np.pi/180, 15*np.pi/180]]).transpose() # (r, l) joint: (+) knee extension
    obs_body_space[:,[15 + x for x in [0, 44]]] = np.array([[-45*np.pi/180, 90*np.pi/180]]).transpose() # (r, l) joint: (+) ankle extension (plantarflexion)
    obs_body_space[:,[16 + x for x in [0, 44]]] = np.array([[-5*np.pi, 5*np.pi]]).transpose() # (r, l) joint: (+) hip abduction
    obs_body_space[:,[17 + x for x in [0, 44]]] = np.array([[-5*np.pi, 5*np.pi]]).transpose() # (r, l) joint: (+) hip extension
    obs_body_space[:,[18 + x for x in [0, 44]]] = np.array([[-5*np.pi, 5*np.pi]]).transpose() # (r, l) joint: (+) knee extension
    obs_body_space[:,[19 + x for x in [0, 44]]] = np.array([[-5*np.pi, 5*np.pi]]).transpose() # (r, l) joint: (+) ankle extension (plantarflexion)
    obs_body_space[:,[20 + x for x in list(range(0, 33, 3)) + list(range(44, 77, 3))]] = np.array([[0, 3]]).transpose() # (r, l) muscle forces, normalized to maximum isometric force
    obs_body_space[:,[21 + x for x in list(range(0, 33, 3)) + list(range(44, 77, 3))]] = np.array([[0, 3]]).transpose() # (r, l) muscle lengths, normalized to optimal length
    obs_body_space[:,[22 + x for x in list(range(0, 33, 3)) + list(range(44, 77, 3))]] = np.array([[-50, 50]]).transpose() # (r, l) muscle velocities, normalized to optimal length per second

    def get_model_key(self):
        return self.model

    def set_difficulty(self, difficulty):
        self.difficulty = difficulty
        if difficulty == 0:
            self.time_limit = 1000
        if difficulty == 1:
            self.time_limit = 1000
        if difficulty == 2:
            self.time_limit = 1000
            print("difficulty 2 for Round 1")
        if difficulty == 3:
            self.time_limit = 2500 # 25 sec
            print("difficulty 3 for Round 2")
        self.spec.timestep_limit = self.time_limit

    def __init__(self, visualize=True, integrator_accuracy=5e-8, integrator_MaxStepSize=1e-1, difficulty=3, seed=None, report=None, Perturbtime = 50):  #Shakiba
        if difficulty not in [0, 1, 2, 3]:
            raise ValueError("difficulty level should be in [0, 1, 2, 3].")
        self.model_paths = {}
        self.model_paths['3D'] = os.path.join(os.path.dirname(__file__), '../models/gait14dof22musc_20170320.osim')
        self.model_paths['2D'] = os.path.join(os.path.dirname(__file__), '../models/gait14dof22musc_planar_20170320.osim')
        self.model_path = self.model_paths[self.get_model_key()]
        super(L2M2019Env, self).__init__(visualize=visualize, integrator_accuracy=integrator_accuracy, integrator_MaxStepSize= integrator_MaxStepSize, Perturbtime = Perturbtime)  #Shakiba

        self.Fmax = {}
        self.lopt = {}
        for leg, side in zip(['r_leg', 'l_leg'], ['r', 'l']):
            self.Fmax[leg] = {}
            self.lopt[leg] = {}
            for MUS, mus in zip(    ['HAB', 'HAD', 'HFL', 'GLU', 'HAM', 'RF', 'VAS', 'BFSH', 'GAS', 'SOL', 'TA'],
                                    ['abd', 'add', 'iliopsoas', 'glut_max', 'hamstrings', 'rect_fem', 'vasti', 'bifemsh', 'gastroc', 'soleus', 'tib_ant']):
                muscle = self.osim_model.muscleSet.get('{}_{}'.format(mus,side))
                Fmax = muscle.getMaxIsometricForce()
                lopt = muscle.getOptimalFiberLength()
                self.Fmax[leg][MUS] = muscle.getMaxIsometricForce()
                self.lopt[leg][MUS] = muscle.getOptimalFiberLength()

        self.set_difficulty(difficulty)

        if report:
            bufsize = 0
            self.observations_file = open('%s-obs.csv' % (report,),'w', bufsize)
            self.actions_file = open('%s-act.csv' % (report,),'w', bufsize)
            self.get_headers()

        # create target velocity field
        from envs.target import VTgtField
        self.vtgt = VTgtField(visualize=visualize, version=self.difficulty, dt=self.osim_model.stepsize, seed=seed)
        self.obs_vtgt_space = self.vtgt.vtgt_space

    def reset(self, project=True, seed=None, init_pose=None, obs_as_dict=True):
        self.t = 0
        self.init_reward()
        self.vtgt.reset(version=self.difficulty, seed=seed)

        self.footstep['n'] = 0
        self.footstep['new'] = False
        self.footstep['r_contact'] = 1
        self.footstep['l_contact'] = 1


        # initialize state
        self.osim_model.state = self.osim_model.model.initializeState()
        if init_pose is None:
            init_pose = self.INIT_POSE
        state = self.osim_model.get_state()
        QQ = state.getQ()
        QQDot = state.getQDot()
        for i in range(17):
            QQDot[i] = 0
        QQ[3] = 0 # x: (+) forward
        QQ[5] = 0 # z: (+) right
        QQ[1] = 0*np.pi/180 # roll
        QQ[2] = 0*np.pi/180 # yaw
        QQDot[3] = init_pose[0] # forward speed
        QQDot[5] = init_pose[1] # right speed
        QQ[4] = init_pose[2] # pelvis height
        QQ[0] = -init_pose[3] # trunk lean: (+) backward
        QQ[7] = -init_pose[4] # right hip abduct
        QQ[6] = -init_pose[5] # right hip flex
        QQ[13] = init_pose[6] # right knee extend
        QQ[15] = -init_pose[7] # right ankle flex
        QQ[10] = -init_pose[8] # left hip adduct
        QQ[9] = -init_pose[9] # left hip flex
        QQ[14] = init_pose[10] # left knee extend
        QQ[16] = -init_pose[11] # left ankle flex

        state.setQ(QQ)
        state.setU(QQDot)

        #Shakiba
        stateNew = state.getY()
        
        #'''
        # W23 (late swing), starting from reset model, new Omega
        New = [-0.154397,7.89708e-15,6.50271e-15,11.3305,0.917347,-3.09239e-15,0.587762,-4.85294e-15,7.06279e-15,-0.106138,3.06046e-15,2.94342e-16,-0.0872665,0.0893034,-0.00694239,
        0.0978685,0.0310454,0.321948,2.4383e-15,-5.5404e-16,1.25969,-0.00717378,-1.28051e-15,-1.18733,-1.25221e-15,1.63945e-15,-0.694707,1.00682e-15,-7.62523e-16,2.25543e-14,-1.43075,
        -1.86683,-6.30358,-0.255911,0.1,0.0888518,0.0881512,0.06403,0.310563,0.0944375,0.349698,0.134722,0.418565,0.189729,0.0628963,0.105986,0.015285,0.0302394,0.0830192,0.0741896,
        0.0105381,0.0642575,0.0101673,0.0514716,0.134338,0.0557335,0.1,0.074893,0.0881512,0.0552251,0.0131974,0.06221,0.0107428,0.134721,0.0161594,0.155271,0.448823,0.131448,0.0122363,
        0.0670305,0.0139359,0.0798503,0.552872,0.0464049,0.0903862,0.0453758,0.0880167,0.0602122]
        #'''

        '''
        # W0
        New = [-0.287299, -1.31682e-15, -3.66468e-14, 13.5088, 0.860736, 3.28012e-16, 0.674812, 1.41008e-14, 1.25091e-14, 0.0257695, -1.65809e-14, 1.93144e-14,
        -0.0872665, -0.285062, -0.640083, 0.09148, -0.0393004, -0.744687, 2.85253e-16, -5.36856e-15, 1.22813, 0.197466, -1.0203e-15, -1.21477, 2.36026e-15,
        2.12543e-15, 5.06283, -2.985e-15, 3.19925e-15, 2.52862e-14, 0.940134, -9.25827, 0.904554, 0.964788, 0.1, 0.0902055, 0.0881512, 0.0666291, 0.376295,
        0.0860412, 0.0420147, 0.127759, 0.516494, 0.193388, 0.0474489, 0.10222, 0.0331823, 0.0419731, 0.08084, 0.0916862, 0.0737822, 0.0550081, 0.0567051,
        0.0473013, 0.195988, 0.0570651, 0.1, 0.0776668, 0.0881512, 0.0550317, 0.0855877, 0.0443433, 0.199992, 0.117579, 0.112712, 0.161836, 0.994689, 0.126086,
        0.0118722, 0.0926901, 0.0100955, 0.108532, 0.205141, 0.0436463, 0.0454119, 0.0462103, 0.237019, 0.061203]
        '''

        New [3] =0
        for i in range(len(New)):
            stateNew[i] = New[i]
        state.setY(stateNew)
        #'''

        self.osim_model.set_state(state)
        #Shakiba: comment out equilibrateMuscles
        self.osim_model.model.equilibrateMuscles(self.osim_model.state)


        self.osim_model.state.setTime(0)
        self.osim_model.istep = 0

        self.osim_model.reset_manager()

        d = super(L2M2019Env, self).get_state_desc()
        pose = np.array([d['body_pos']['pelvis'][0], -d['body_pos']['pelvis'][2], d['joint_pos']['ground_pelvis'][2]])
        self.v_tgt_field, self.flag_new_v_tgt_field = self.vtgt.update(pose)

        if not project:
            return self.get_state_desc()
        if obs_as_dict:
            return self.get_observation_dict()
        return self.get_observation()

    def load_model(self, model_path = None):
        super(L2M2019Env, self).load_model(model_path)
        observation_space = np.concatenate((self.obs_vtgt_space, self.obs_body_space), axis=1)
        self.observation_space = convert_to_gym(observation_space)

    def step(self, action, project=True, obs_as_dict=True):
        action_mapped = [action[i] for i in self.act2mus]

        _, reward, done, info = super(L2M2019Env, self).step(action_mapped, project=project, obs_as_dict=obs_as_dict)
        self.t += self.osim_model.stepsize
        self.update_footstep()

        d = super(L2M2019Env, self).get_state_desc()
        self.pose = np.array([d['body_pos']['pelvis'][0], -d['body_pos']['pelvis'][2], d['joint_pos']['ground_pelvis'][2]])
        self.v_tgt_field, self.flag_new_v_tgt_field = self.vtgt.update(self.pose)

        if project:
            if obs_as_dict:
                obs = self.get_observation_dict()
            else:
                obs = self.get_observation()
        else:
            obs = self.get_state_desc()

        return obs, reward, done, info

    def change_model(self, model='3D', difficulty=3, seed=0):
        if self.model != model:
            self.model = model
            self.load_model(self.model_paths[self.get_model_key()])
        self.set_difficulty(difficulty)

    def is_done(self):
        state_desc = self.get_state_desc()
        return state_desc['body_pos']['pelvis'][1] < 0.6

    def update_footstep(self):
        state_desc = self.get_state_desc()

        # update contact
        r_contact = True if state_desc['forces']['foot_r'][1] < -0.05*(self.MASS*self.G) else False
        l_contact = True if state_desc['forces']['foot_l'][1] < -0.05*(self.MASS*self.G) else False

        self.footstep['new'] = False
        if (not self.footstep['r_contact'] and r_contact) or (not self.footstep['l_contact'] and l_contact):
            self.footstep['new'] = True
            self.footstep['n'] += 1

        self.footstep['r_contact'] = r_contact
        self.footstep['l_contact'] = l_contact

    def get_observation_dict(self):
        state_desc = self.get_state_desc()

        obs_dict = {}

        obs_dict['v_tgt_field'] = state_desc['v_tgt_field']

        # pelvis state (in local frame)
        obs_dict['pelvis'] = {}
        obs_dict['pelvis']['height'] = state_desc['body_pos']['pelvis'][1]
        obs_dict['pelvis']['pitch'] = -state_desc['joint_pos']['ground_pelvis'][0] # (+) pitching forward
        obs_dict['pelvis']['roll'] = state_desc['joint_pos']['ground_pelvis'][1] # (+) rolling around the forward axis (to the right)
        yaw = state_desc['joint_pos']['ground_pelvis'][2]
        dx_local, dy_local = rotate_frame(  state_desc['body_vel']['pelvis'][0],
                                            state_desc['body_vel']['pelvis'][2],
                                            yaw)
        dz_local = state_desc['body_vel']['pelvis'][1]
        obs_dict['pelvis']['vel'] = [   dx_local, # (+) forward
                                        -dy_local, # (+) leftward
                                        dz_local, # (+) upward
                                        -state_desc['joint_vel']['ground_pelvis'][0], # (+) pitch angular velocity
                                        state_desc['joint_vel']['ground_pelvis'][1], # (+) roll angular velocity
                                        state_desc['joint_vel']['ground_pelvis'][2]] # (+) yaw angular velocity

        # leg state
        for leg, side in zip(['r_leg', 'l_leg'], ['r', 'l']):
            obs_dict[leg] = {}
            grf = [ f/(self.MASS*self.G) for f in state_desc['forces']['foot_{}'.format(side)][0:3] ] # forces normalized by bodyweight
            grm = [ m/(self.MASS*self.G) for m in state_desc['forces']['foot_{}'.format(side)][3:6] ] # forces normalized by bodyweight
            grfx_local, grfy_local = rotate_frame(-grf[0], -grf[2], yaw)
            if leg == 'r_leg':
                obs_dict[leg]['ground_reaction_forces'] = [ grfx_local, # (+) forward
                                                            grfy_local, # (+) lateral (rightward)
                                                            -grf[1]] # (+) upward
            if leg == 'l_leg':
                obs_dict[leg]['ground_reaction_forces'] = [ grfx_local, # (+) forward
                                                            -grfy_local, # (+) lateral (leftward)
                                                            -grf[1]] # (+) upward

            # joint angles
            obs_dict[leg]['joint'] = {}
            obs_dict[leg]['joint']['hip_abd'] = -state_desc['joint_pos']['hip_{}'.format(side)][1] # (+) hip abduction
            obs_dict[leg]['joint']['hip'] = -state_desc['joint_pos']['hip_{}'.format(side)][0] # (+) extension
            obs_dict[leg]['joint']['knee'] = state_desc['joint_pos']['knee_{}'.format(side)][0] # (+) extension
            obs_dict[leg]['joint']['ankle'] = -state_desc['joint_pos']['ankle_{}'.format(side)][0] # (+) extension
            # joint angular velocities
            obs_dict[leg]['d_joint'] = {}
            obs_dict[leg]['d_joint']['hip_abd'] = -state_desc['joint_vel']['hip_{}'.format(side)][1] # (+) hip abduction
            obs_dict[leg]['d_joint']['hip'] = -state_desc['joint_vel']['hip_{}'.format(side)][0] # (+) extension
            obs_dict[leg]['d_joint']['knee'] = state_desc['joint_vel']['knee_{}'.format(side)][0] # (+) extension
            obs_dict[leg]['d_joint']['ankle'] = -state_desc['joint_vel']['ankle_{}'.format(side)][0] # (+) extension

            # muscles
            for MUS, mus in zip(    ['HAB', 'HAD', 'HFL', 'GLU', 'HAM', 'RF', 'VAS', 'BFSH', 'GAS', 'SOL', 'TA'],
                                    ['abd', 'add', 'iliopsoas', 'glut_max', 'hamstrings', 'rect_fem', 'vasti', 'bifemsh', 'gastroc', 'soleus', 'tib_ant']):
                obs_dict[leg][MUS] = {}
                obs_dict[leg][MUS]['f'] = state_desc['muscles']['{}_{}'.format(mus,side)]['fiber_force']/self.Fmax[leg][MUS]
                obs_dict[leg][MUS]['l'] = state_desc['muscles']['{}_{}'.format(mus,side)]['fiber_length']/self.lopt[leg][MUS]
                obs_dict[leg][MUS]['v'] = state_desc['muscles']['{}_{}'.format(mus,side)]['fiber_velocity']/self.lopt[leg][MUS]

        return obs_dict

    ## Values in the observation vector
    # 'vtgt_field': vtgt vectors in body frame (2*11*11 = 242 values)
    # 'pelvis': height, pitch, roll, 6 vel (9 values)
    # for each 'r_leg' and 'l_leg' (*2)
    #   'ground_reaction_forces' (3 values)
    #   'joint' (4 values)
    #   'd_joint' (4 values)
    #   for each of the eleven muscles (*11)
    #       normalized 'f', 'l', 'v' (3 values)
    # 242 + 9 + 2*(3 + 4 + 4 + 11*3) = 339
    def get_observation(self):
        obs_dict = self.get_observation_dict()

        # Augmented environment from the L2R challenge
        res = []

        # target velocity field (in body frame)
        v_tgt = np.ndarray.flatten(obs_dict['v_tgt_field'])
        res += v_tgt.tolist()

        res.append(obs_dict['pelvis']['height'])
        res.append(obs_dict['pelvis']['pitch'])
        res.append(obs_dict['pelvis']['roll'])
        res.append(obs_dict['pelvis']['vel'][0]/self.LENGTH0)
        res.append(obs_dict['pelvis']['vel'][1]/self.LENGTH0)
        res.append(obs_dict['pelvis']['vel'][2]/self.LENGTH0)
        res.append(obs_dict['pelvis']['vel'][3])
        res.append(obs_dict['pelvis']['vel'][4])
        res.append(obs_dict['pelvis']['vel'][5])

        for leg in ['r_leg', 'l_leg']:
            res += obs_dict[leg]['ground_reaction_forces']
            res.append(obs_dict[leg]['joint']['hip_abd'])
            res.append(obs_dict[leg]['joint']['hip'])
            res.append(obs_dict[leg]['joint']['knee'])
            res.append(obs_dict[leg]['joint']['ankle'])
            res.append(obs_dict[leg]['d_joint']['hip_abd'])
            res.append(obs_dict[leg]['d_joint']['hip'])
            res.append(obs_dict[leg]['d_joint']['knee'])
            res.append(obs_dict[leg]['d_joint']['ankle'])
            for MUS in ['HAB', 'HAD', 'HFL', 'GLU', 'HAM', 'RF', 'VAS', 'BFSH', 'GAS', 'SOL', 'TA']:
                res.append(obs_dict[leg][MUS]['f'])
                res.append(obs_dict[leg][MUS]['l'])
                res.append(obs_dict[leg][MUS]['v'])
        return res

    def get_observation_clipped(self):
        obs = self.get_observation()
        return np.clip(obs, self.observation_space.low, self.observation_space.high)

    def get_observation_space_size(self):
        return 339

    def get_state_desc(self):
        d = super(L2M2019Env, self).get_state_desc()
        #state_desc['joint_pos']
        #state_desc['joint_vel']
        #state_desc['joint_acc']
        #state_desc['body_pos']
        #state_desc['body_vel']
        #state_desc['body_acc']
        #state_desc['body_pos_rot']
        #state_desc['body_vel_rot']
        #state_desc['body_acc_rot']
        #state_desc['forces']
        #state_desc['muscles']
        #state_desc['markers']
        #state_desc['misc']
        if self.difficulty in [0, 1, 2, 3]:
            d['v_tgt_field'] = self.v_tgt_field # shape: (2, 11, 11)
        else:
            raise ValueError("difficulty level should be in [0, 1, 2, 3].")
        return d

    def init_reward(self):
        self.init_reward_1()

    def init_reward_1(self):
        self.d_reward = {}

        self.d_reward['weight'] = {}
        self.d_reward['weight']['footstep'] = 10
        self.d_reward['weight']['effort'] = 1
        self.d_reward['weight']['v_tgt'] = 1
        self.d_reward['weight']['v_tgt_R2'] = 3

        self.d_reward['alive'] = 0.1
        self.d_reward['effort'] = 0

        self.d_reward['footstep'] = {}
        self.d_reward['footstep']['effort'] = 0
        self.d_reward['footstep']['del_t'] = 0
        self.d_reward['footstep']['del_v'] = 0

    def get_reward(self):
        if self.difficulty == 3: # Round 2
            return self.get_reward_2()
        return self.get_reward_1()

    def get_reward_1(self): # for L2M2019 Round 1
        state_desc = self.get_state_desc()
        if not self.get_prev_state_desc():
            return 0

        reward = 0
        dt = self.osim_model.stepsize

        # alive reward
        # should be large enough to search for 'success' solutions (alive to the end) first
        reward += self.d_reward['alive']

        # effort ~ muscle fatigue ~ (muscle activation)^2
        ACT2 = 0
        for muscle in sorted(state_desc['muscles'].keys()):
            ACT2 += np.square(state_desc['muscles'][muscle]['activation'])
        self.d_reward['effort'] += ACT2*dt
        self.d_reward['footstep']['effort'] += ACT2*dt

        self.d_reward['footstep']['del_t'] += dt

        # reward from velocity (penalize from deviating from v_tgt)

        p_body = [state_desc['body_pos']['pelvis'][0], -state_desc['body_pos']['pelvis'][2]]
        v_body = [state_desc['body_vel']['pelvis'][0], -state_desc['body_vel']['pelvis'][2]]
        v_tgt = self.vtgt.get_vtgt(p_body).T

        self.d_reward['footstep']['del_v'] += (v_body - v_tgt)*dt

        # footstep reward (when made a new step)
        if self.footstep['new']:
            # footstep reward: so that solution does not avoid making footsteps
            # scaled by del_t, so that solution does not get higher rewards by making unnecessary (small) steps
            reward_footstep_0 = self.d_reward['weight']['footstep']*self.d_reward['footstep']['del_t']

            # deviation from target velocity
            # the average velocity a step (instead of instantaneous velocity) is used
            # as velocity fluctuates within a step in normal human walking
            #reward_footstep_v = -self.reward_w['v_tgt']*(self.footstep['del_vx']**2)
            reward_footstep_v = -self.d_reward['weight']['v_tgt']*np.linalg.norm(self.d_reward['footstep']['del_v'])/self.LENGTH0

            # panalize effort
            reward_footstep_e = -self.d_reward['weight']['effort']*self.d_reward['footstep']['effort']

            self.d_reward['footstep']['del_t'] = 0
            self.d_reward['footstep']['del_v'] = 0
            self.d_reward['footstep']['effort'] = 0

            reward += reward_footstep_0 + reward_footstep_v + reward_footstep_e

        # success bonus
        if not self.is_done() and (self.osim_model.istep >= self.spec.timestep_limit): #and self.failure_mode is 'success':
            # retrieve reward (i.e. do not penalize for the simulation terminating in a middle of a step)
            reward_footstep_0 = self.d_reward['weight']['footstep']*self.d_reward['footstep']['del_t']
            #reward += reward_footstep_0 + 100
            reward += reward_footstep_0 + 10

        return reward

    def get_reward_2(self): # for L2M2019 Round 2
        state_desc = self.get_state_desc()
        if not self.get_prev_state_desc():
            return 0

        reward = 0
        dt = self.osim_model.stepsize

        # alive reward
        # should be large enough to search for 'success' solutions (alive to the end) first
        reward += self.d_reward['alive']

        # effort ~ muscle fatigue ~ (muscle activation)^2
        ACT2 = 0
        for muscle in sorted(state_desc['muscles'].keys()):
            ACT2 += np.square(state_desc['muscles'][muscle]['activation'])
        self.d_reward['effort'] += ACT2*dt
        self.d_reward['footstep']['effort'] += ACT2*dt

        self.d_reward['footstep']['del_t'] += dt

        # reward from velocity (penalize from deviating from v_tgt)

        p_body = [state_desc['body_pos']['pelvis'][0], -state_desc['body_pos']['pelvis'][2]]
        v_body = [state_desc['body_vel']['pelvis'][0], -state_desc['body_vel']['pelvis'][2]]
        v_tgt = self.vtgt.get_vtgt(p_body).T

        self.d_reward['footstep']['del_v'] += (v_body - v_tgt)*dt

        # simulation ends successfully
        flag_success = (not self.is_done() # model did not fall down
            and (self.osim_model.istep >= self.spec.timestep_limit) # reached end of simulatoin
            and self.footstep['n'] > 5) # took more than 5 footsteps (to prevent standing still)

        # footstep reward (when made a new step)
        if self.footstep['new'] or flag_success:
            # footstep reward: so that solution does not avoid making footsteps
            # scaled by del_t, so that solution does not get higher rewards by making unnecessary (small) steps
            reward_footstep_0 = self.d_reward['weight']['footstep']*self.d_reward['footstep']['del_t']

            # deviation from target velocity
            # the average velocity a step (instead of instantaneous velocity) is used
            # as velocity fluctuates within a step in normal human walking
            #reward_footstep_v = -self.reward_w['v_tgt']*(self.footstep['del_vx']**2)
            reward_footstep_v = -self.d_reward['weight']['v_tgt_R2']*np.linalg.norm(self.d_reward['footstep']['del_v'])/self.LENGTH0

            # panalize effort
            reward_footstep_e = -self.d_reward['weight']['effort']*self.d_reward['footstep']['effort']

            self.d_reward['footstep']['del_t'] = 0
            self.d_reward['footstep']['del_v'] = 0
            self.d_reward['footstep']['effort'] = 0

            reward += reward_footstep_0 + reward_footstep_v + reward_footstep_e

        # task bonus: if stayed enough at the first target
        if self.flag_new_v_tgt_field:
            reward += 500

        return reward