n_steps = 0 os.makedirs(log_dir, exist_ok=True) ################ MODEL AND GYM ENVIRONMENT if ENVIRONMENT == 'rgbd': env = tm700_rgbd_gym(renders=RENDERS, isDiscrete=DISCRETE) env = Monitor(env, os.path.join(log_dir, 'monitor.csv'), allow_early_resets=True) if ENVIRONMENT == 'rgb': env = tm700_rgb_gym(renders=RENDERS, isDiscrete=DISCRETE) env = Monitor(env, os.path.join(log_dir, 'monitor.csv'), allow_early_resets=True) if ENVIRONMENT == 'possensor': env = tm700_possensor_gym(renders=RENDERS, isDiscrete=DISCRETE) env = Monitor(env, os.path.join(log_dir, 'monitor.csv'), allow_early_resets=True) if MODEL == 'DQN': from stable_baselines.deepq.policies import LnCnnPolicy, MlpPolicy if ENVIRONMENT in ['rgbd', 'rgb', 'rgbdsparse']: model = DQN(LnCnnPolicy, env, verbose=1, tensorboard_log=(log_dir + "tensorboard_%s_%s_%s/") % (MODEL, ENVIRONMENT, DATE), gamma=0.99, learning_rate=0.0001, buffer_size=50000, exploration_fraction=0.1, exploration_final_eps=0.02, train_freq=1, batch_size=32, double_q=True, learning_starts=1000, target_network_update_freq=500, prioritized_replay=True, prioritized_replay_alpha=0.6, prioritized_replay_beta0=0.4, prioritized_replay_beta_iters=None, prioritized_replay_eps=1e-06, param_noise=False, _init_setup_model=True, policy_kwargs=None, full_tensorboard_log=False)
import logging tf.get_logger().setLevel(logging.ERROR) args=parser.arg_parse() # Create save dir model_dir = args.model_dir if not os.path.exists(args.model_dir): os.makedirs(args.model_dir) ############## load environment env = tm700_possensor_gym(renders=False, isDiscrete=True) # env = gym.make('CartPole-v1') # vectorized environments allow to easily multiprocess training # we demonstrate its usefulness in the next examples env = DummyVecEnv([lambda: env]) # The algorithms require a vectorized environment to run ############ MODELS model = PPO2('MlpPolicy', 'Pendulum-v0', verbose=0).learn(8000) # The model will be saved under PPO2_tutorial.zip # ddpg_model = DDPG(MlpPolicy, env, verbose=1, param_noise=None, random_exploration=0.1) kwargs = {'double_q': True, 'prioritized_replay': True, 'policy_kwargs': dict(dueling=True)} #DQN + Prioritized Experience Replay + Double Q-Learning + Dueling dqn_model = DQN('MlpPolicy', env, verbose=1, **kwargs)