def test_mopo_impl( observation_shape, action_size, learning_rate, optim_factory, encoder_factory, n_ensembles, lam, discrete_action, scaler, ): impl = MOPOImpl( observation_shape, action_size, learning_rate, optim_factory, encoder_factory, n_ensembles, lam, discrete_action, use_gpu=False, scaler=scaler, ) impl.build() torch_impl_tester(impl, discrete=discrete_action)
def test_mopo_impl( observation_shape, action_size, learning_rate, optim_factory, encoder_factory, n_ensembles, lam, discrete_action, scaler, action_scaler, ): impl = MOPOImpl( observation_shape=observation_shape, action_size=action_size, learning_rate=learning_rate, optim_factory=optim_factory, encoder_factory=encoder_factory, n_ensembles=n_ensembles, lam=lam, discrete_action=discrete_action, use_gpu=None, scaler=scaler, action_scaler=action_scaler if not discrete_action else None, ) impl.build() torch_impl_tester(impl, discrete=discrete_action)
def test_probabilistic_ensemble_dynamics_impl( observation_shape, action_size, learning_rate, optim_factory, encoder_factory, n_ensembles, variance_type, discrete_action, scaler, action_scaler, reward_scaler, ): impl = ProbabilisticEnsembleDynamicsImpl( observation_shape=observation_shape, action_size=action_size, learning_rate=learning_rate, optim_factory=optim_factory, encoder_factory=encoder_factory, n_ensembles=n_ensembles, variance_type=variance_type, discrete_action=discrete_action, use_gpu=None, scaler=scaler, action_scaler=action_scaler if not discrete_action else None, reward_scaler=reward_scaler, ) impl.build() torch_impl_tester(impl, discrete=discrete_action, n_ensembles=n_ensembles)
def test_mopo_impl(observation_shape, action_size, learning_rate, eps, weight_decay, n_ensembles, lam, use_batch_norm, discrete_action, scaler): impl = MOPOImpl(observation_shape, action_size, learning_rate, eps, weight_decay, n_ensembles, lam, use_batch_norm, discrete_action, use_gpu=False, scaler=scaler) torch_impl_tester(impl, discrete=discrete_action)