def configure_ve_her(params): env = cached_make_env(params['make_env']) env.reset(reset_goal=False) def reward_fun(ag_2, g, info): # vectorized return env.compute_reward(achieved_goal=ag_2, desired_goal=g, info=info) # Prepare configuration for HER. ddpg_her_params = { 'reward_fun': reward_fun, } for name in ['replay_strategy', 'replay_k']: ddpg_her_params[name] = params[name] # params['_' + name] = ddpg_her_params[name] del params[name] params['ddpg_her_params'] = ddpg_her_params ddpg_sample_transitions = make_sample_her_transitions(**ddpg_her_params) ve_her_params = { 'reward_fun': reward_fun, } for name in ['replay_strategy', 'replay_k']: ve_her_params[name] = params[f've_{name}'] # params[f'_ve_{name}'] = ve_her_params[name] del params[f've_{name}'] params['ve_her_params'] = ve_her_params ve_sample_transitions = make_sample_her_transitions(**ve_her_params) return ddpg_sample_transitions, ve_sample_transitions
def configure_ve_her(params): env = cached_make_env(params['make_env']) # try: # env.reset() # except NotImplementedError: # env.get_reset_obs() def reward_fun(ag_2, g, info): # vectorized return env.compute_reward(achieved_goal=ag_2, desired_goal=g, info=info) # Prepare configuration for HER. ddpg_her_params = { 'reward_fun': reward_fun, } for name in ['replay_strategy', 'replay_k']: ddpg_her_params[name] = params[name] params['_' + name] = ddpg_her_params[name] del params[name] ddpg_sample_transitions = make_sample_her_transitions(**ddpg_her_params) ve_her_params = { 'reward_fun': reward_fun, } for name in ['replay_strategy', 'replay_k']: ve_her_params[name] = params[f've_{name}'] params[f'_ve_{name}'] = ve_her_params[name] del params[f've_{name}'] ve_sample_transitions = make_sample_her_transitions(**ve_her_params) return ddpg_sample_transitions, ve_sample_transitions
def configure_her(params): print("Inside configure her") env = cached_make_env(params['make_env']) env.reset() def reward_fun(ag_2, g, info): # vectorized return env.compute_reward(achieved_goal=ag_2, desired_goal=g, info=info) # Prepare configuration for HER. her_params = { 'reward_fun': reward_fun, } for name in ['replay_strategy', 'replay_k']: her_params[name] = params[name] params['_' + name] = her_params[name] del params[name] if params['prioritization'] == 'energy': sample_her_transitions = make_sample_her_transitions_energy( **her_params) elif params['prioritization'] == 'tderror': sample_her_transitions = make_sample_her_transitions_prioritized_replay( **her_params) else: sample_her_transitions = make_sample_her_transitions(**her_params) return sample_her_transitions
def configure_her(params): try: env = cached_make_env(params['make_env']) env.reset() def reward_fun(ag_2, g, info): # vectorized return env.compute_reward(achieved_goal=ag_2, desired_goal=g, info=info) except: def reward_fun(ag_2, g, info): # vectorized achieved_goal=ag_2 desired_goal=g info=info try: d = np.linalg.norm(achieved_goal[:,0] - desired_goal[:,0], axis=-1) fragile_goal = np.linalg.norm((achieved_goal[:,1:5] - desired_goal[:,1:5]), axis=-1) + np.linalg.norm((achieved_goal[:,5:] - desired_goal[:,5:])*((achieved_goal[:,5:] - desired_goal[:,5:]) < 0), axis=-1) except: d = np.linalg.norm(achieved_goal[0] - desired_goal[0], axis=-1) fragile_goal = np.linalg.norm((achieved_goal[1:5] - desired_goal[1:5]), axis=-1) + np.linalg.norm((achieved_goal[5:] - desired_goal[5:])*((achieved_goal[5:] - desired_goal[5:]) < 0), axis=-1) return -(d > np.pi/16).astype(np.float32) - np.float32(fragile_goal) * 2.0 # Prepare configuration for HER. her_params = { 'reward_fun': reward_fun, } for name in ['replay_strategy', 'replay_k']: her_params[name] = params[name] params['_' + name] = her_params[name] del params[name] sample_her_transitions = make_sample_her_transitions(**her_params) return sample_her_transitions
def configure_her(params): env = cached_make_env(params['make_env']) env.reset() # goal_indexes = params['goal_indexes'] policy_index = params['policy_index'] #can add policy index here for subgoal rewards**** def reward_fun(ag_2, g, info): # vectorized return env.compute_reward(achieved_goal=ag_2, desired_goal=g, info=info) # Prepare configuration for HER. her_params = { 'reward_fun': reward_fun, # 'policy_indexes': goal_indexes, 'policy_index': policy_index, } for name in ['replay_strategy', 'replay_k']: her_params[name] = params[name] params['_' + name] = her_params[name] del params[name] sample_her_transitions = make_sample_her_transitions(**her_params) return sample_her_transitions
def configure_her(params): env = cached_make_env(params['make_env']) env.reset() def reward_fun(ag_2, g, info): # vectorized return env.compute_reward(achieved_goal=ag_2, desired_goal=g, info=info) # Prepare configuration for HER. her_params = { 'reward_fun': reward_fun, } for name in ['replay_strategy', 'replay_k']: her_params[name] = params[name] params['_' + name] = her_params[name] del params[name] sample_her_transitions = make_sample_her_transitions(**her_params) return sample_her_transitions
def configure_her(params): env = cached_make_env(params['make_env']) env.reset() def reward_fun(ag_2, g, info): # vectorized return env.compute_reward(achieved_goal=ag_2, desired_goal=g, info=info) # Prepare configuration for HER. her_params = { 'reward_fun': reward_fun, } for name in ['replay_strategy', 'replay_k']: her_params[name] = params[name] params['_' + name] = her_params[name] del params[name] sample_her_transitions = make_sample_her_transitions(**her_params) return sample_her_transitions
def configure_her(params): env = cached_make_env(params['make_env']) env.reset() def getMinObjOriGoal(curr_obs): min_obj_ori_goal = 1000 max_obj_ori_goal = 0 for row in curr_obs: rel_pos = row[6:9] #print('rel_pos', rel_pos) obj_ori_goal = rel_pos.copy() obj_ori_goal[2] += 0.03 #print("obj_ori_goal", obj_ori_goal, obj_ori_goal[2]) if np.linalg.norm(obj_ori_goal) <= min_obj_ori_goal: min_obj_ori_goal = np.linalg.norm(obj_ori_goal) if np.linalg.norm(obj_ori_goal) >= max_obj_ori_goal: max_obj_ori_goal = np.linalg.norm(obj_ori_goal) #print("min val", min_obj_ori_goal) #print("max val", max_obj_ori_goal) return min_obj_ori_goal def reward_fun(o, ag_2, g, info): # vectorized global minDist curr_obs = o.copy() w = -3.0 #w = -5.0 #showed 0.9 min_obj_ori_goal = getMinObjOriGoal(curr_obs) return w * min_obj_ori_goal + env.compute_reward( achieved_goal=ag_2, desired_goal=g, info=info) # Prepare configuration for HER. her_params = { 'reward_fun': reward_fun, } for name in ['replay_strategy', 'replay_k']: her_params[name] = params[name] params['_' + name] = her_params[name] del params[name] sample_her_transitions = make_sample_her_transitions(**her_params) return sample_her_transitions