def test_contains_correct_n_in_vector(self):
     # check a vector is contained even if it has n as one of its values.
     n = 1  # number of trials
     k = 2  # number of categories
     multinomial_space = multinomial.Multinomial(k, n)
     is_contained_vector = np.asarray([1, 0], dtype=np.uint32)
     self.assertTrue(multinomial_space.contains(is_contained_vector))
    def test_one_hot_conversion(self):
        observation_space = gym.spaces.Dict(
            {'x': multinomial.Multinomial(10, 1)})

        params = classifier_agents.ScoringAgentParams(
            default_action_fn=lambda: 0,
            feature_keys=['x'],
            convert_one_hot_to_integer=True,
            threshold_policy=threshold_policies.ThresholdPolicy.
            SINGLE_THRESHOLD)

        agent = classifier_agents.ThresholdAgent(
            observation_space=observation_space,
            reward_fn=rewards.NullReward(),
            params=params)

        self.assertEqual(agent._get_features({'x': _one_hot(5)}), [5])
Esempio n. 3
0
    def __init__(self, params=None):
        if params is None:
            params = Params()

        self.action_space = multinomial.Multinomial(params.n_locations,
                                                    params.n_attention_units)

        assert (params.n_locations == len(params.prior_incident_counts)
                and params.n_locations == len(params.incident_rates))

        # Define the observation space.
        # Crimes seen is multidiscrete because it may not sum to n_attention_units.
        # MultiDiscrete uses dtype=np.int32.
        if params.attention_replacement:
            # If there is attention replacement, the number of attention doesn't bound
            # the incidents_seen.
            incidents_seen_space = spaces.MultiDiscrete(
                [np.iinfo(np.int32).max] * params.n_locations)
        else:
            incidents_seen_space = spaces.MultiDiscrete(
                [params.n_attention_units + 1] * params.n_locations)

        incidents_reported_space = spaces.MultiDiscrete(
            [np.iinfo(np.int32).max] * params.n_locations)

        n_features = len(params.feature_means)
        location_features_space = spaces.Box(low=-np.inf,
                                             high=np.inf,
                                             shape=(params.n_locations,
                                                    n_features),
                                             dtype=np.float32)

        # The first observation from this state is not necessarily contained by this
        # observation space. It conveys a prior of the initial incident counts.
        self.observable_state_vars = {
            'incidents_seen': incidents_seen_space,
            'incidents_reported': incidents_reported_space,
            'location_features': location_features_space
        }

        super(LocationAllocationEnv, self).__init__(params)
        self._state_init()
    def test_agent_on_one_hot_vectors(self):

        # Space of 1-hot vectors of length 10.
        observation_space = gym.spaces.Dict(
            {'x': multinomial.Multinomial(10, 1)})

        params = classifier_agents.ScoringAgentParams(
            default_action_fn=lambda: 0,
            feature_keys=['x'],
            convert_one_hot_to_integer=True,
            burnin=999,
            threshold_policy=threshold_policies.ThresholdPolicy.
            SINGLE_THRESHOLD)

        agent = classifier_agents.ThresholdAgent(
            observation_space=observation_space,
            reward_fn=rewards.NullReward(),
            params=params)

        observation_space.seed(100)
        # Train a boundary at 3 using 1-hot vectors.
        observation = observation_space.sample()
        agent._act_impl(observation, reward=None, done=False)
        for _ in range(1000):
            last_observation = observation
            observation = observation_space.sample()
            agent._act_impl(observation,
                            reward=int(np.argmax(last_observation['x']) >= 3),
                            done=False)
            if agent._training_corpus.examples:
                assert int(agent._training_corpus.examples[-1].features[0] >= 3
                           ) == agent._training_corpus.examples[-1].label

        agent.frozen = True

        self.assertTrue(agent.act({'x': _one_hot(3)}, done=False))
        self.assertFalse(agent.act({'x': _one_hot(2)}, done=False))
 def setUp(self):
     self.n = 15  # number of trials
     self.k = 6  # number of categories
     self.multinomial_space = multinomial.Multinomial(self.k, self.n)
     self.multinomial_space.seed(0)
     super(MultinomialTest, self).setUp()
Esempio n. 6
0
 def __init__(self, params=None):
     super(DelayedImpactEnv, self).__init__(params)
     self.observable_state_vars[
         'applicant_features'] = multinomial.Multinomial(
             self.initial_params.applicant_distribution.dim, 1)
     self.observation_space = spaces.Dict(self.observable_state_vars)