コード例 #1
0
def run(workspace: str,
        iterations: int,
        render_training: bool, render_validation: bool, render_test: bool):
    # Define the logger
    logger: logging.Logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)
    # Generate the Phy Environment
    environment: PhyEnvironment = PhyEnvironment("phy_environment", urllc_param=None, rl_env_ver='bernoulli', vision_ahead=6,
                                                 cw_tot_number=120, cw_class=None, q_norm=0)
    # Note: thresholds are high so we can perform training without worrying about reaching a certain goal
    validation_threshold: float = 0.0
    validation_std: float or None = None
    test_threshold: float = 0.0
    test_std: float or None = None
    # Define Neural Network layers
    nn_config: Config = Config()
    nn_config.add_hidden_layer(LayerType.dense, [128, tensorflow.nn.relu, True, tensorflow.contrib.layers.xavier_initializer()], layer_name="dense_1")
    nn_config.add_hidden_layer(LayerType.dense, [64, tensorflow.nn.relu, True, tensorflow.contrib.layers.xavier_initializer()], layer_name="dense_2")
    nn_config.add_hidden_layer(LayerType.dense, [32, tensorflow.nn.relu, True, tensorflow.contrib.layers.xavier_initializer()], layer_name="dense_3")

    # Define model
    inner_model: ProximalPolicyOptimization = _define_ppo_model(actor_config=nn_config, critic_config=nn_config)
    # Define agent
    ppo_agent: PPOAgent = _define_agent(inner_model)
    # Define experiment
    experiment: PhyExperiment = PhyExperiment("phy_experiment-6",
                                              validation_threshold=validation_threshold, validation_std=validation_std,
                                              test_threshold=test_threshold, test_std=test_std,
                                              environment=environment, agent=ppo_agent)
    # Define experiment data
    saves_to_keep: int = 15
    plots_dpi: int = 150
    parallel: int = 10
    training_episodes: int = 2000
    validation_episodes: int = 100
    training_validation_volleys: int = 30
    test_episodes: int = 100
    test_volleys: int = 10
    episode_length_max: int = 140
    # Run experiment
    run_experiment(logger=logger, experiment=experiment,
                   file_name=__file__, workspace_path=workspace,
                   training_volleys_episodes=training_episodes, validation_volleys_episodes=validation_episodes,
                   training_validation_volleys=training_validation_volleys,
                   test_volleys_episodes=test_episodes, test_volleys=test_volleys,
                   episode_length=episode_length_max, parallel=parallel,
                   render_during_training=render_training, render_during_validation=render_validation,
                   render_during_test=render_test,
                   iterations=iterations, saves_to_keep=saves_to_keep, plots_dpi=plots_dpi)
コード例 #2
0
def run(log_path: str, iteration: int, render: bool,
        urllc_param: float, cw_tot_number: int, cw_classes: list,
        csv_path: str = None, episodes: int = 1000):
    # Define the logger
    logger: logging.Logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)
    # Generate the Phy Environment
    # Note: formally this should be changed each time to be like the environment the model was trained in
    # However it works anyway, so long that action and state spaces are the same
    environment: PhyEnvironment = PhyEnvironment("phy_environment", rl_env_ver='bernoulli', render=render,
                                                 urllc_param=urllc_param, q_norm=0,
                                                 cw_tot_number=cw_tot_number, cw_class=cw_classes)
    # Note: these values should be set according to the environment the model is tested in
    validation_threshold: float = 0.0
    validation_std: float or None = None
    test_threshold: float = 0.0
    test_std: float or None = None
    # Define agent
    aggressive_agent: AggressiveAgent = _define_agent()
    # Define experiment
    experiment: PhyExperiment = PhyExperiment("aggressive",
                                              validation_threshold=validation_threshold, validation_std=validation_std,
                                              test_threshold=test_threshold, test_std=test_std,
                                              environment=environment, agent=aggressive_agent,
                                              csv_path=csv_path)
    # Define experiment data
    # episodes: int = 100
    volleys: int = 1
    episode_length_max: int = 1400
    if experiment.setup(logger=logger, iteration=iteration):
        # Prepare the logger handlers
        logger.handlers = []
        # Generate a console and a file handler for the logger
        console_handler: logging.StreamHandler = logging.StreamHandler(sys.stdout)
        file_handler: logging.FileHandler = logging.FileHandler(log_path + "/info.log", "w+")
        # Set handlers properties
        console_handler.setLevel(logging.DEBUG)
        file_handler.setLevel(logging.DEBUG)
        formatter: logging.Formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
        console_handler.setFormatter(formatter)
        file_handler.setFormatter(formatter)
        # Add the handlers to the logger
        logger.addHandler(console_handler)
        logger.addHandler(file_handler)
        # Actually test the model
        experiment.test(logger=logger, episodes=episodes, volleys=volleys, episode_length=episode_length_max,
                        restore_path="", render=render)
コード例 #3
0
def run(restore_path: str,
        log_path: str,
        iteration: int,
        render: bool,
        urllc_param: float,
        cw_tot_number: int,
        cw_classes: list,
        csv_path: str = None,
        episodes: int = 1000):
    # Define the logger
    logger: logging.Logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)
    # Generate the Phy Environment
    # Note: formally this should be changed each time to be like the environment the model was trained in
    # However it works anyway, so long that action and state spaces are the same
    environment: PhyEnvironment = PhyEnvironment("phy_environment",
                                                 rl_env_ver='bernoulli',
                                                 render=render,
                                                 urllc_param=urllc_param,
                                                 q_norm=0,
                                                 cw_tot_number=cw_tot_number,
                                                 cw_class=cw_classes)
    # Note: these values should be set according to the environment the model is tested in
    validation_threshold: float = 0.0
    validation_std: float or None = None
    test_threshold: float = 0.0
    test_std: float or None = None
    # Define Neural Network layers
    # Note: this is important, should be the same of the loaded model (the initializers are not relevant)
    nn_config: Config = Config()
    nn_config.add_hidden_layer(LayerType.dense, [
        128, tensorflow.nn.relu, True,
        tensorflow.contrib.layers.xavier_initializer()
    ],
                               layer_name="dense_1")
    nn_config.add_hidden_layer(LayerType.dense, [
        64, tensorflow.nn.relu, True,
        tensorflow.contrib.layers.xavier_initializer()
    ],
                               layer_name="dense_2")
    nn_config.add_hidden_layer(LayerType.dense, [
        32, tensorflow.nn.relu, True,
        tensorflow.contrib.layers.xavier_initializer()
    ],
                               layer_name="dense_3")
    # Define model
    inner_model: ProximalPolicyOptimization = _define_ppo_model(
        actor_config=nn_config, critic_config=nn_config)
    # Define agent
    ppo_agent: PPOAgent = _define_agent(inner_model)
    # Define experiment
    experiment: PhyExperiment = PhyExperiment(
        "phy_experiment",
        validation_threshold=validation_threshold,
        validation_std=validation_std,
        test_threshold=test_threshold,
        test_std=test_std,
        environment=environment,
        agent=ppo_agent,
        csv_path=csv_path,
        model=iteration)
    # Define experiment data
    # episodes: int = 100
    volleys: int = 1
    episode_length_max: int = 1400
    if experiment.setup(logger=logger, iteration=iteration):
        # Prepare the logger handlers
        logger.handlers = []
        # Generate a console and a file handler for the logger
        console_handler: logging.StreamHandler = logging.StreamHandler(
            sys.stdout)
        file_handler: logging.FileHandler = logging.FileHandler(
            log_path + "/info.log", "w+")
        # Set handlers properties
        console_handler.setLevel(logging.DEBUG)
        file_handler.setLevel(logging.DEBUG)
        formatter: logging.Formatter = logging.Formatter(
            "%(asctime)s - %(levelname)s - %(message)s")
        console_handler.setFormatter(formatter)
        file_handler.setFormatter(formatter)
        # Add the handlers to the logger
        logger.addHandler(console_handler)
        logger.addHandler(file_handler)
        # Actually test the model
        experiment.test(logger=logger,
                        episodes=episodes,
                        volleys=volleys,
                        episode_length=episode_length_max,
                        restore_path=restore_path,
                        render=render)