def main(load_policy=False): global log_dir model_class = TD3 # works also with SAC and DDPG action_space = 6 fixed = True #0 completely fixed, 1 slightly random radius, 2 big random radius, object_position = 1 normalize_observations = False gamma = 0.9 memory_limit = 1000000 normalize_returns = True timesteps = 5000000 discreteAction = 0 rend = False env = pandaPushGymEnvHER(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=1, isDiscrete=discreteAction, action_space = action_space, fixedPositionObj = fixed, includeVelObs = True, object_position=object_position) env = Monitor(env, log_dir, allow_early_resets=True) goal_selection_strategy = 'future' n_actions = env.action_space.shape[-1] action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions)) # Wrap the model model = HER(CustomTD3Policy, env, model_class, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy, verbose=1,tensorboard_log="../pybullet_logs/panda_push_TD3/stable_baselines/PUSHING_TD3+HER_FIXED_POSITION_PHASE_1_IK", buffer_size=1000000,batch_size=256, random_exploration=0.3, action_noise=action_noise) if (load_policy): model = HER.load("../policies/USEFUL_POLICIES/PUSHING_TD3+HER_FIXED_POSITIONbest_model.pkl", env=env, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy, tensorboard_log="../pybullet_logs/panda_push_TD3/stable_baselines/PUSHING_TD3+HER_FIXED_POSITION_PHASE_1_IK", buffer_size=1000000,batch_size=256,random_exploration=0.3, action_noise=action_noise) model.learn(timesteps,log_interval=100, callback = callback) print("Saving Policy PHASE_1") model.save("../policies/PUSHING_TD3+HER_FIXED_POSITION_PHASE_1_IK")
batch_size = 16 # -m memory_limit = 1000000 # -r normalize_returns = True # -t timesteps = 100000 policy_name = "pushing_policy" discreteAction = 0 rend = True env = pandaPushGymEnvHER(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=0, isDiscrete=discreteAction, action_space=action_space, fixedPositionObj=fixed, includeVelObs=True, object_position=0, test_phase=True) goal_selection_strategy = 'future' # equivalent to GoalSelectionStrategy.FUTURE # Wrap the model model = HER.load("../policies/pushing_DDPG_HER_PHASE_1best_model.pkl", env=env) obs = env.reset() for _ in range(10000): action, _ = model.predict(obs) obs, reward, done, _ = env.step(action) if done: