def act_inference(self,
                   logger: logging.Logger,
                   session,
                   interface: RNGDiscretePassThroughInterface,
                   agent_observation_current):
     # Predict the action with the model by sampling from its probability distribution
     action, _, _ = self._model.sample_action(session, agent_observation_current, interface.get_action_mask(logger, session))
     # Return the predicted action
     return action
Ejemplo n.º 2
0
 def act_train(self, logger: logging.Logger, session,
               interface: RNGDiscretePassThroughInterface,
               agent_observation_current):
     # If there is no exploration policy just use the model best prediction (a sample from the probability distribution)
     # Note: it is still inherently exploring
     best_action, self._current_value_estimate = self._model.sample_action(
         session, agent_observation_current,
         interface.get_action_mask(logger, session))
     # Return the exploration action
     return best_action
Ejemplo n.º 3
0
    environment_name: str = 'RNGDiscreteBinary'
    # Generate RNG discrete environment
    sequence_size: int = 50
    range_min: int = -128
    range_max: int = 127
    acceptance_value: float = math.inf
    seed_range_min: int = 0
    seed_range_max: int = 0
    episode_length: int = 100
    threshold_value: float = 0.2
    environment: RNGDiscreteBinaryEpisodicEnvironment = RNGDiscreteBinaryEpisodicEnvironment(
        environment_name, sequence_size, range_min, range_max, seed_range_min,
        seed_range_max, acceptance_value, episode_length, threshold_value)

    # Define interfaces
    interface: RNGDiscretePassThroughInterface = RNGDiscretePassThroughInterface(
        environment)
    # Define experiments
    success_threshold: float = 0.35
    experiment: RNGDiscreteEpisodicExperiment = RNGDiscreteEpisodicExperiment(
        "experiment", success_threshold, environment, vpg_agent, interface)
    # Define experiments data
    testing_episodes: int = 100
    test_cycles: int = 10
    training_episodes: int = 100
    validation_episodes: int = 100
    max_training_episodes: int = 35000
    episode_length_max: int = 100
    plot_sample_density: int = 10
    # Run experiment
    intro: str = "Data:\n" \
                 "\nVanilla Policy Gradient with GAE buffer" \