gymenv = GymEnv(args.env, force_reset=True, record_video=False, record_log=False) # gymenv.env.seed(1) else: gymenv = other_env_class_map[args.env]() #TODO: assert continuous space env = TfEnv(normalize(gymenv)) policy = DeterministicMLPPolicy( env_spec=env.spec, name="policy", # The neural network policy should have two hidden layers, each with 32 hidden units. hidden_sizes=(100, 50, 25), hidden_nonlinearity=tf.nn.relu, ) es = OUStrategy(env_spec=env.spec) qf = ContinuousMLPQFunction( env_spec=env.spec, hidden_sizes=(100, 100), hidden_nonlinearity=tf.nn.relu, ) ddpg_type_map = {"regular": DDPG} ddpg_class = ddpg_type_map[args.type]