Ejemplo n.º 1
0
    return VanillaPolicyGradient("model", discount_factor,
                                 learning_rate_policy, learning_rate_advantage,
                                 value_steps_per_update, config,
                                 lambda_parameter)


def _define_agent(model: VanillaPolicyGradient) -> VPGRNGDiscreteAgent:
    # Define attributes
    updates_per_training_volley: int = 2
    # Return the agent
    return VPGRNGDiscreteAgent("vpg_agent", model, updates_per_training_volley)


if __name__ == "__main__":
    # Parse the command line arguments
    workspace_path, experiment_iterations_number, cuda_devices, render_during_training, render_during_validation, render_during_test = command_line_parse(
    )
    # Define the CUDA devices in which to run the experiment
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = cuda_devices
    # Define the logger
    logger: logging.Logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)
    # Define Neural Network layers
    nn_config: Config = Config()
    nn_config.add_hidden_layer(LayerType.dense, [
        4096, tensorflow.nn.relu, True,
        tensorflow.contrib.layers.xavier_initializer()
    ])
    nn_config.add_hidden_layer(LayerType.dense, [
        4096, tensorflow.nn.relu, True,
        tensorflow.contrib.layers.xavier_initializer()
    exploration_rate_decay: float = 0.00002
    # Return the explorer
    return EpsilonGreedyExplorationPolicy(exploration_rate_max, exploration_rate_min, exploration_rate_decay)


def _define_epsilon_greedy_agent(model: DuelingDeepQLearning, exploration_policy: EpsilonGreedyExplorationPolicy) -> DDDQLTicTacToeAgent:
    # Define attributes
    weight_copy_step_interval: int = 100
    batch_size: int = 150
    # Return the agent
    return DDDQLTicTacToeAgent("dddqn_egreedy_agent", model, exploration_policy, weight_copy_step_interval, batch_size)


if __name__ == "__main__":
    # Parse the command line arguments
    checkpoint_path, iteration_number, cuda_devices, render = command_line_parse(True)
    # Define the CUDA devices in which to run the experiment
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = cuda_devices
    # Define the logger
    logger: logging.Logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)
    # Tic Tac Toe random environment:
    #       - success threshold to consider both the training completed and the experiment successful is around 95% of match won by the agent (depending on reward assigned)
    environment_name: str = 'TicTacToeRandom'
    # Generate Tic Tac Toe environments with random environment player and using the O player as the environment player only with low reward type
    environment_low_reward: TicTacToeEnvironmentRandom = TicTacToeEnvironmentRandom(environment_name, Player.o,
                                                                                    1.0, -0.1, 0.0)
    # Define Neural Network layers
    nn_config: Config = Config()
    nn_config.add_hidden_layer(LayerType.dense, [1024, tensorflow.nn.relu, True, tensorflow.contrib.layers.xavier_initializer()])