def create_bc_module(mock_behavior_specs, bc_settings, use_rnn, tanhresample): # model_path = env.external_brain_names[0] trainer_config = TrainerSettings() trainer_config.network_settings.memory = (NetworkSettings.MemorySettings() if use_rnn else None) policy = NNPolicy( 0, mock_behavior_specs, trainer_config, False, "test", False, tanhresample, tanhresample, ) with policy.graph.as_default(): bc_module = BCModule( policy, policy_learning_rate=trainer_config.hyperparameters.learning_rate, default_batch_size=trainer_config.hyperparameters.batch_size, default_num_epoch=3, settings=bc_settings, ) policy.initialize_or_load( ) # Normally the optimizer calls this after the BCModule is created return bc_module
def create_bc_module(mock_brain, trainer_config, use_rnn, demo_file, tanhresample): # model_path = env.external_brain_names[0] trainer_config["model_path"] = "testpath" trainer_config["keep_checkpoints"] = 3 trainer_config["use_recurrent"] = use_rnn trainer_config["behavioral_cloning"]["demo_path"] = ( os.path.dirname(os.path.abspath(__file__)) + "/" + demo_file) policy = NNPolicy(0, mock_brain, trainer_config, False, False, tanhresample, tanhresample) with policy.graph.as_default(): bc_module = BCModule( policy, policy_learning_rate=trainer_config["learning_rate"], default_batch_size=trainer_config["batch_size"], default_num_epoch=3, **trainer_config["behavioral_cloning"], ) policy.initialize_or_load( ) # Normally the optimizer calls this after the BCModule is created return bc_module