def run(workspace: str, iterations: int, render_training: bool, render_validation: bool, render_test: bool): # Define the logger logger: logging.Logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) # Generate the Phy Environment environment: PhyEnvironment = PhyEnvironment("phy_environment", urllc_param=None, rl_env_ver='bernoulli', vision_ahead=6, cw_tot_number=120, cw_class=None, q_norm=0) # Note: thresholds are high so we can perform training without worrying about reaching a certain goal validation_threshold: float = 0.0 validation_std: float or None = None test_threshold: float = 0.0 test_std: float or None = None # Define Neural Network layers nn_config: Config = Config() nn_config.add_hidden_layer(LayerType.dense, [128, tensorflow.nn.relu, True, tensorflow.contrib.layers.xavier_initializer()], layer_name="dense_1") nn_config.add_hidden_layer(LayerType.dense, [64, tensorflow.nn.relu, True, tensorflow.contrib.layers.xavier_initializer()], layer_name="dense_2") nn_config.add_hidden_layer(LayerType.dense, [32, tensorflow.nn.relu, True, tensorflow.contrib.layers.xavier_initializer()], layer_name="dense_3") # Define model inner_model: ProximalPolicyOptimization = _define_ppo_model(actor_config=nn_config, critic_config=nn_config) # Define agent ppo_agent: PPOAgent = _define_agent(inner_model) # Define experiment experiment: PhyExperiment = PhyExperiment("phy_experiment-6", validation_threshold=validation_threshold, validation_std=validation_std, test_threshold=test_threshold, test_std=test_std, environment=environment, agent=ppo_agent) # Define experiment data saves_to_keep: int = 15 plots_dpi: int = 150 parallel: int = 10 training_episodes: int = 2000 validation_episodes: int = 100 training_validation_volleys: int = 30 test_episodes: int = 100 test_volleys: int = 10 episode_length_max: int = 140 # Run experiment run_experiment(logger=logger, experiment=experiment, file_name=__file__, workspace_path=workspace, training_volleys_episodes=training_episodes, validation_volleys_episodes=validation_episodes, training_validation_volleys=training_validation_volleys, test_volleys_episodes=test_episodes, test_volleys=test_volleys, episode_length=episode_length_max, parallel=parallel, render_during_training=render_training, render_during_validation=render_validation, render_during_test=render_test, iterations=iterations, saves_to_keep=saves_to_keep, plots_dpi=plots_dpi)
# Return the agent return VPGRNGDiscreteAgent("vpg_agent", model, updates_per_training_volley) if __name__ == "__main__": # Parse the command line arguments workspace_path, experiment_iterations_number, cuda_devices, render_during_training, render_during_validation, render_during_test = command_line_parse( ) # Define the CUDA devices in which to run the experiment os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = cuda_devices # Define the logger logger: logging.Logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) # Define Neural Network layers nn_config: Config = Config() nn_config.add_hidden_layer(LayerType.dense, [ 4096, tensorflow.nn.relu, True, tensorflow.contrib.layers.xavier_initializer() ]) nn_config.add_hidden_layer(LayerType.dense, [ 4096, tensorflow.nn.relu, True, tensorflow.contrib.layers.xavier_initializer() ]) nn_config.add_hidden_layer(LayerType.dense, [ 4096, tensorflow.nn.relu, True, tensorflow.contrib.layers.xavier_initializer() ]) # Define model inner_model: VanillaPolicyGradient = _define_vpg_model(nn_config) # Define agent
def run(restore_path: str, log_path: str, iteration: int, render: bool, urllc_param: float, cw_tot_number: int, cw_classes: list, csv_path: str = None, episodes: int = 1000): # Define the logger logger: logging.Logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) # Generate the Phy Environment # Note: formally this should be changed each time to be like the environment the model was trained in # However it works anyway, so long that action and state spaces are the same environment: PhyEnvironment = PhyEnvironment("phy_environment", rl_env_ver='bernoulli', render=render, urllc_param=urllc_param, q_norm=0, cw_tot_number=cw_tot_number, cw_class=cw_classes) # Note: these values should be set according to the environment the model is tested in validation_threshold: float = 0.0 validation_std: float or None = None test_threshold: float = 0.0 test_std: float or None = None # Define Neural Network layers # Note: this is important, should be the same of the loaded model (the initializers are not relevant) nn_config: Config = Config() nn_config.add_hidden_layer(LayerType.dense, [ 128, tensorflow.nn.relu, True, tensorflow.contrib.layers.xavier_initializer() ], layer_name="dense_1") nn_config.add_hidden_layer(LayerType.dense, [ 64, tensorflow.nn.relu, True, tensorflow.contrib.layers.xavier_initializer() ], layer_name="dense_2") nn_config.add_hidden_layer(LayerType.dense, [ 32, tensorflow.nn.relu, True, tensorflow.contrib.layers.xavier_initializer() ], layer_name="dense_3") # Define model inner_model: ProximalPolicyOptimization = _define_ppo_model( actor_config=nn_config, critic_config=nn_config) # Define agent ppo_agent: PPOAgent = _define_agent(inner_model) # Define experiment experiment: PhyExperiment = PhyExperiment( "phy_experiment", validation_threshold=validation_threshold, validation_std=validation_std, test_threshold=test_threshold, test_std=test_std, environment=environment, agent=ppo_agent, csv_path=csv_path, model=iteration) # Define experiment data # episodes: int = 100 volleys: int = 1 episode_length_max: int = 1400 if experiment.setup(logger=logger, iteration=iteration): # Prepare the logger handlers logger.handlers = [] # Generate a console and a file handler for the logger console_handler: logging.StreamHandler = logging.StreamHandler( sys.stdout) file_handler: logging.FileHandler = logging.FileHandler( log_path + "/info.log", "w+") # Set handlers properties console_handler.setLevel(logging.DEBUG) file_handler.setLevel(logging.DEBUG) formatter: logging.Formatter = logging.Formatter( "%(asctime)s - %(levelname)s - %(message)s") console_handler.setFormatter(formatter) file_handler.setFormatter(formatter) # Add the handlers to the logger logger.addHandler(console_handler) logger.addHandler(file_handler) # Actually test the model experiment.test(logger=logger, episodes=episodes, volleys=volleys, episode_length=episode_length_max, restore_path=restore_path, render=render)