Beispiel #1
0
    gymenv = GymEnv(args.env,
                    force_reset=True,
                    record_video=False,
                    record_log=False)
    # gymenv.env.seed(1)
else:
    gymenv = other_env_class_map[args.env]()

#TODO: assert continuous space

env = TfEnv(normalize(gymenv))

policy = DeterministicMLPPolicy(
    env_spec=env.spec,
    name="policy",
    # The neural network policy should have two hidden layers, each with 32 hidden units.
    hidden_sizes=(100, 50, 25),
    hidden_nonlinearity=tf.nn.relu,
)

es = OUStrategy(env_spec=env.spec)

qf = ContinuousMLPQFunction(
    env_spec=env.spec,
    hidden_sizes=(100, 100),
    hidden_nonlinearity=tf.nn.relu,
)

ddpg_type_map = {"regular": DDPG}

ddpg_class = ddpg_type_map[args.type]