def create_bc_module(mock_behavior_specs, bc_settings, use_rnn, tanhresample): # model_path = env.external_brain_names[0] trainer_config = TrainerSettings() trainer_config.network_settings.memory = (NetworkSettings.MemorySettings() if use_rnn else None) policy = TFPolicy(0, mock_behavior_specs, trainer_config, tanhresample, tanhresample) with policy.graph.as_default(): bc_module = BCModule( policy, policy_learning_rate=trainer_config.hyperparameters.learning_rate, default_batch_size=trainer_config.hyperparameters.batch_size, default_num_epoch=3, settings=bc_settings, ) policy.initialize( ) # Normally the optimizer calls this after the BCModule is created return bc_module
def test_step_overflow(): behavior_spec = mb.setup_test_behavior_specs(use_discrete=True, use_visual=False, vector_action_space=[2], vector_obs_space=1) policy = TFPolicy( 0, behavior_spec, TrainerSettings(network_settings=NetworkSettings(normalize=True)), create_tf_graph=False, ) policy.create_input_placeholders() policy.initialize() policy.set_step(2**31 - 1) assert policy.get_current_step() == 2**31 - 1 policy.increment_step(3) assert policy.get_current_step() == 2**31 + 2
def create_sac_optimizer_mock(dummy_config, use_rnn, use_discrete, use_visual): mock_brain = mb.setup_test_behavior_specs( use_discrete, use_visual, vector_action_space=DISCRETE_ACTION_SPACE if use_discrete else VECTOR_ACTION_SPACE, vector_obs_space=VECTOR_OBS_SPACE if not use_visual else 0, ) trainer_settings = dummy_config trainer_settings.network_settings.memory = (NetworkSettings.MemorySettings( sequence_length=16, memory_size=10) if use_rnn else None) policy = TFPolicy(0, mock_brain, trainer_settings, "test", False, create_tf_graph=False) optimizer = SACOptimizer(policy, trainer_settings) policy.initialize() return optimizer
def _create_ppo_optimizer_ops_mock(dummy_config, use_rnn, use_discrete, use_visual): mock_specs = mb.setup_test_behavior_specs( use_discrete, use_visual, vector_action_space=DISCRETE_ACTION_SPACE if use_discrete else VECTOR_ACTION_SPACE, vector_obs_space=VECTOR_OBS_SPACE, ) trainer_settings = attr.evolve(dummy_config, framework=FrameworkType.TENSORFLOW) trainer_settings.network_settings.memory = ( NetworkSettings.MemorySettings(sequence_length=16, memory_size=10) if use_rnn else None ) policy = TFPolicy( 0, mock_specs, trainer_settings, "test", False, create_tf_graph=False ) optimizer = PPOOptimizer(policy, trainer_settings) policy.initialize() return optimizer