def main(): param_noise = None env1 = tm700GymEnv2(renders=False, isDiscrete=False) model = DDPG(MlpPolicy, env1, verbose=1, param_noise=param_noise, random_exploration=0.1) # model = DQN(MlpPolicy, env1, verbose=1, exploration_fraction=0.3) # = deepq.models.mlp([64]) start = time.time() model.learn(total_timesteps=1000000) #max_timesteps=10000000, # exploration_fraction=0.1, # exploration_final_eps=0.02, # print_freq=10, # callback=callback, network='mlp') print("Saving model") model.save("tm_test_model_randomblocksrotated.pkl") print('total time', time.time() - start)
import os, inspect currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) parentdir = os.path.dirname(os.path.dirname(currentdir)) os.sys.path.insert(0, parentdir) import gym from pybullet_envs.bullet.tm700GymEnv_TEST import tm700GymEnv2 from stable_baselines import DQN, PPO2, DDPG from pybullet_envs.baselines.train_tm700_multivec import evaluate, record_video #################### PARAMETERS savedmodel = "tm_test_model_randomblocks.pkl" env = tm700GymEnv2(renders=True, isDiscrete=False) model = DDPG.load(savedmodel, env=env) ########## run simulation def runsimulation(model, env, iterations): obs = env.reset() time_step_counter = 0 iterations = iterations while time_step_counter < iterations: action, _ = model.predict(obs) obs, rewards, dones, _ = env.step(action) # Assumption: eval conducted on single env only! time_step_counter +=1 # time.sleep(0.1) if dones:
""" Utility function for multiprocessed env. :param env_id: (str) the environment ID :param seed: (int) the inital seed for RNG :param rank: (int) index of the subprocess """ def _init(): env = gym.make(env_id) # Important: use a different seed for each environment env.seed(seed + rank) return env set_global_seeds(seed) return _init if __name__ == '__main__': env_id = tm700GymEnv2(renders=True, isDiscrete=True) # The different number of processes that will be used PROCESSES_TO_TEST = [1, 2, 4, 8, 16] NUM_EXPERIMENTS = 3 # RL algorithms can often be unstable, so we run several experiments (see https://arxiv.org/abs/1709.06560) TRAIN_STEPS = 5000 # Number of episodes for evaluation EVAL_EPS = 20 ALGO = PPO2 # We will create one environment to evaluate the agent on eval_env = DummyVecEnv([lambda: gym.make(env_id)])
# from stable_baselines.common.env_checker import check_env from pybullet_envs.bullet.tm700GymEnv_TEST import tm700GymEnv2 env = tm700GymEnv2() # It will check your custom environment and output additional warnings if needed # check_env(env) obs = env.reset() n_steps = 10 for _ in range(n_steps): # Random action action = env.action_space.sample() obs, reward, done, info = env.step(action)