def test_contains_correct_n_in_vector(self): # check a vector is contained even if it has n as one of its values. n = 1 # number of trials k = 2 # number of categories multinomial_space = multinomial.Multinomial(k, n) is_contained_vector = np.asarray([1, 0], dtype=np.uint32) self.assertTrue(multinomial_space.contains(is_contained_vector))
def test_one_hot_conversion(self): observation_space = gym.spaces.Dict( {'x': multinomial.Multinomial(10, 1)}) params = classifier_agents.ScoringAgentParams( default_action_fn=lambda: 0, feature_keys=['x'], convert_one_hot_to_integer=True, threshold_policy=threshold_policies.ThresholdPolicy. SINGLE_THRESHOLD) agent = classifier_agents.ThresholdAgent( observation_space=observation_space, reward_fn=rewards.NullReward(), params=params) self.assertEqual(agent._get_features({'x': _one_hot(5)}), [5])
def __init__(self, params=None): if params is None: params = Params() self.action_space = multinomial.Multinomial(params.n_locations, params.n_attention_units) assert (params.n_locations == len(params.prior_incident_counts) and params.n_locations == len(params.incident_rates)) # Define the observation space. # Crimes seen is multidiscrete because it may not sum to n_attention_units. # MultiDiscrete uses dtype=np.int32. if params.attention_replacement: # If there is attention replacement, the number of attention doesn't bound # the incidents_seen. incidents_seen_space = spaces.MultiDiscrete( [np.iinfo(np.int32).max] * params.n_locations) else: incidents_seen_space = spaces.MultiDiscrete( [params.n_attention_units + 1] * params.n_locations) incidents_reported_space = spaces.MultiDiscrete( [np.iinfo(np.int32).max] * params.n_locations) n_features = len(params.feature_means) location_features_space = spaces.Box(low=-np.inf, high=np.inf, shape=(params.n_locations, n_features), dtype=np.float32) # The first observation from this state is not necessarily contained by this # observation space. It conveys a prior of the initial incident counts. self.observable_state_vars = { 'incidents_seen': incidents_seen_space, 'incidents_reported': incidents_reported_space, 'location_features': location_features_space } super(LocationAllocationEnv, self).__init__(params) self._state_init()
def test_agent_on_one_hot_vectors(self): # Space of 1-hot vectors of length 10. observation_space = gym.spaces.Dict( {'x': multinomial.Multinomial(10, 1)}) params = classifier_agents.ScoringAgentParams( default_action_fn=lambda: 0, feature_keys=['x'], convert_one_hot_to_integer=True, burnin=999, threshold_policy=threshold_policies.ThresholdPolicy. SINGLE_THRESHOLD) agent = classifier_agents.ThresholdAgent( observation_space=observation_space, reward_fn=rewards.NullReward(), params=params) observation_space.seed(100) # Train a boundary at 3 using 1-hot vectors. observation = observation_space.sample() agent._act_impl(observation, reward=None, done=False) for _ in range(1000): last_observation = observation observation = observation_space.sample() agent._act_impl(observation, reward=int(np.argmax(last_observation['x']) >= 3), done=False) if agent._training_corpus.examples: assert int(agent._training_corpus.examples[-1].features[0] >= 3 ) == agent._training_corpus.examples[-1].label agent.frozen = True self.assertTrue(agent.act({'x': _one_hot(3)}, done=False)) self.assertFalse(agent.act({'x': _one_hot(2)}, done=False))
def setUp(self): self.n = 15 # number of trials self.k = 6 # number of categories self.multinomial_space = multinomial.Multinomial(self.k, self.n) self.multinomial_space.seed(0) super(MultinomialTest, self).setUp()
def __init__(self, params=None): super(DelayedImpactEnv, self).__init__(params) self.observable_state_vars[ 'applicant_features'] = multinomial.Multinomial( self.initial_params.applicant_distribution.dim, 1) self.observation_space = spaces.Dict(self.observable_state_vars)