def initial_state(self): """The initial state value.""" # 70% topics are trashy, rest are nutritious. num_trashy_topics = int(self._num_topics * 0.7) num_nutritious_topics = self._num_topics - num_trashy_topics trashy = tf.linspace(self._topic_min_utility, 0., num_trashy_topics) nutritious = tf.linspace(0., self._topic_max_utility, num_nutritious_topics) topic_quality_means = tf.concat([trashy, nutritious], axis=0) # Equal probability of each topic. doc_topic = ed.Categorical( logits=tf.zeros((self._num_docs, self._num_topics)), dtype=tf.int32) # Fixed variance for doc quality. doc_quality_var = 0.1 doc_quality = ed.Normal( loc=tf.gather(topic_quality_means, doc_topic), scale=doc_quality_var) # 1-hot doc features. doc_features = ed.Normal( loc=tf.one_hot(doc_topic, depth=self._num_topics), scale=0.7) # All videos have same length. video_length = ed.Deterministic( loc=tf.ones((self._num_docs,)) * self._video_length) return Value( # doc_id=0 is reserved for "null" doc. doc_id=ed.Deterministic( loc=tf.range(start=1, limit=self._num_docs + 1, dtype=tf.int32)), doc_topic=doc_topic, doc_quality=doc_quality, doc_features=doc_features, doc_length=video_length)
def next_metrics(self, previous_metrics, corpus_state, user_state, user_response, slate_doc): """The metrics value after the initial value.""" del corpus_state, user_response, slate_doc reward = user_state.get("utilities") return Value( reward=ed.Deterministic(loc=reward), cumulative_reward=ed.Deterministic( loc=previous_metrics.get("cumulative_reward") + reward))
def test_no_choice(self): mnl_model = selectors.MultinomialLogitChoiceModel((2, ), 10.0 * tf.ones(2)) slate_document_logits = ed.Deterministic( loc=np.array([[0., 0.], [0., 0.]], dtype=np.float32)) expected = ed.Deterministic(loc=np.array([2, 2], dtype=np.int32)) actual = mnl_model.choice(slate_document_logits) self.assertAllEqual(self.evaluate(expected), self.evaluate(actual.get('choice')))
def next_metrics(self, previous_metrics, corpus_state, user_state, user_response, slate_doc): """The metrics value after the initial value.""" del corpus_state, user_state, slate_doc # consumed_time will be -1 for unclicked slates. reward = tf.clip_by_value(user_response.get("consumed_time"), 0.0, np.Inf) return Value( reward=ed.Deterministic(loc=reward), cumulative_reward=ed.Deterministic( loc=previous_metrics.get("cumulative_reward") + reward))
def test_choice(self): imnl_model = selectors.IteratedMultinomialLogitChoiceModel( 2, (2, 2), -np.Inf * tf.ones((2, 2))) slate_document_logits = ed.Deterministic( loc=np.array([[[10., 0.], [0., 10.]], [[10., 0.], [0., 10.]]], dtype=np.float32)) expected = ed.Deterministic( loc=np.array([[[0, 1], [1, 0]], [[0, 1], [1, 0]]], dtype=np.int32)) actual = imnl_model.choice(slate_document_logits) self.assertAllEqual(self.evaluate(expected), self.evaluate(actual.get('choice')))
def test_available_documents(self): corpus_state = Value(provider_click_count=ed.Deterministic( loc=np.array([0, 100, 0], dtype=np.float32))) expected = { 'provider_id': ed.Deterministic(loc=tf.ones((self._num_docs, ), dtype=tf.int32)), 'doc_features': ed.Deterministic(loc=tf.zeros((self._num_docs, self._num_topics), dtype=tf.float32)), } actual = self._video_corpus.available_documents(corpus_state) self.assertAllClose(self.evaluate(expected), self.evaluate(actual.as_dict))
def test_dot(self): # Four users, two topics, and slate size is two. similarity_model = affinities.TargetPointSimilarity((4, ), 2, 'dot') user_interests = ed.Deterministic(loc=np.array( [[-4, 3], [-4, -3], [4, 3], [4, -3]], dtype=np.float32)) slate_docs = ed.Deterministic( loc=np.array([[[-8, 6], [0, 0]], [[-8, -6], [0, 0]], [[8, 6], [0, 0]], [[8, -6], [0, 0]]], dtype=np.float32)) expected = ed.Deterministic(loc=np.array( [[50, 0.], [50, 0.], [50, 0.], [50, 0.]], dtype=np.float32)) actual = similarity_model.affinities(user_interests, slate_docs) self.assertAllClose(self.evaluate(expected), self.evaluate(actual.get('affinities')))
def test_negative_cosine(self): # Four users, two topics, and five documents. similarity_model = affinities.TargetPointSimilarity((4, ), 5, 'negative_cosine') user_interests = ed.Deterministic(loc=np.array( [[-4, 3], [-4, -3], [4, 3], [4, -3]], dtype=np.float32)) documents = ed.Deterministic(loc=np.array( [[-8, 6], [-8, -6], [8, 6], [8, -6], [0, 0]], dtype=np.float32)) expected = ed.Deterministic(loc=np.array( [[1., 0.28, -0.28, -1., 0.], [0.28, 1., -1., -0.28, 0.], [-0.28, -1., 1., 0.28, 0.], [-1., -0.28, 0.28, 1., 0.]], dtype=np.float32)) actual = similarity_model.affinities(user_interests, documents) self.assertAllClose(self.evaluate(expected), self.evaluate(actual.get('affinities')))
def test_states(self): """Tests next state with a mock model.""" self._recommender = cf_recommender.CollabFilteringRecommender( self._config, model_ctor=functools.partial(MockModel, model_output={})) init_state = self._recommender.initial_state() # Create a dummy user response. mock_user_response = Value( choice=ed.Deterministic( loc=tf.constant([0, 1, 0], dtype=tf.int32)), consumed_time=ed.Deterministic( loc=tf.constant([2.2, 4.4, 1.1], dtype=tf.float32))) mock_slate_docs = Value( doc_id=tf.constant([[2, 3], [4, 5], [6, 7]], dtype=tf.int32)) next_state = self._recommender.next_state(init_state, mock_user_response, mock_slate_docs) self.assertAllEqual( [[0, 0, 0, 0, 2], [0, 0, 0, 0, 5], [0, 0, 0, 0, 6]], self.evaluate(next_state.as_dict)['doc_history.state']) self.assertAllClose([[0., 0., 0., 0., 2.2], [0., 0., 0., 0., 4.4], [0., 0., 0., 0., 1.1]], self.evaluate( next_state.as_dict)['ctime_history.state']) # Let's assume first user didn't consume any document (equivalent to saying # user consumed 'null' document). mock_user_response = Value( choice=ed.Deterministic( loc=tf.constant([2, 0, 2], dtype=tf.int32)), consumed_time=ed.Deterministic( loc=tf.constant([-1., 5.5, -1.], dtype=tf.float32))) next_state = self._recommender.next_state(next_state, mock_user_response, mock_slate_docs) # We do not update user's history if user didn't consume any # document. next_state_dict = self.evaluate(next_state.as_dict) self.assertAllEqual( [[0, 0, 0, 0, 2], [0, 0, 0, 5, 4], [0, 0, 0, 0, 6]], next_state_dict['doc_history.state']) self.assertAllClose([[0., 0., 0., 0., 2.2], [0., 0., 0., 4.4, 5.5], [0., 0., 0., 0., 1.1]], next_state_dict['ctime_history.state'])
def test_single_peaked(self): similarity_model = affinities.TargetPointSimilarity((4, ), 2, 'single_peaked') user_interests = ed.Deterministic(loc=np.array( [[-4, 3], [-4, -3], [4, 3], [4, -3]], dtype=np.float32)) slate_docs = ed.Deterministic( loc=np.array([[[-8, 6], [0, 0]], [[-8, -6], [0, 0]], [[8, 6], [0, 0]], [[8, -6], [0, 0]]], dtype=np.float32)) expected = ed.Deterministic(loc=np.array( [[50, 0.], [14, 0.], [-14, 0.], [-50, 0.]], dtype=np.float32)) actual = similarity_model.affinities(user_interests, slate_docs, affinity_peaks=tf.constant( [[32., 32.], [32., 0.], [0., 32.], [0., 0.]])) self.assertAllClose(self.evaluate(expected), self.evaluate(actual.get('affinities')))
def test_slate_docs(self): user_obs = Value(user_interests=ed.Deterministic( loc=[[0., 1.], [1., 0.], [1., 1.]])) available_docs = Value( provider_id=ed.Deterministic( loc=np.array([0, 0, 0, 0], dtype=np.int32)), doc_features=ed.Deterministic( loc=[[0., 0.], [1., 1.], [0., 1.], [1., 0.]])) expected = { 'doc_scores': [[1.0000043, 1.0000001, 2.0000086, 0.58579284], [1.000009, 1.0000083, 0.5857929, 2.0000098], [0.58579123, 2.0000002, 1.0000033, 1.0000091]], 'provider_id': [[0, 0], [0, 0], [0, 0]], 'doc_features': [[[0., 1.], [0., 0.]], [[1., 0.], [0., 0.]], [[1., 1.], [1., 0.]]], } actual = self._recommender.slate_docs( self._recommender.initial_state(), user_obs, available_docs) self.assertAllClose(expected, self.evaluate(actual.as_dict))
def test_response(self): slate_docs = Value( features=ed.Deterministic(loc=tf.zeros((self._num_users, self._slate_size, self._num_topics))), ) user_state = self._user.initial_state() response = self.evaluate( self._user.next_response(user_state, slate_docs).as_dict) # MNL choice model has nochoice_logits set to -np.Inf for all users. # Users will click on the only document presented to them. self.assertAllClose(response['choice'], [2, 1, 2, 1])
def test_inverse_euclidean(self): # Four users, two topics, and five documents. similarity_model = affinities.TargetPointSimilarity( (4, ), 5, 'inverse_euclidean') user_interests = ed.Deterministic(loc=np.array( [[-4, 3], [-4, -3], [4, 3], [4, -3]], dtype=np.float32)) documents = ed.Deterministic(loc=np.array( [[-8, 6], [-8, -6], [8, 6], [8, -6], [0, 0]], dtype=np.float32)) expected = ed.Deterministic(loc=np.array( [[.2, np.sqrt(1. / 97), np.sqrt(1. / 153), 1. / 15, .2], [np.sqrt(1. / 97), .2, 1. / 15, np.sqrt(1. / 153), .2], [np.sqrt(1. / 153), 1. / 15, .2, np.sqrt(1. / 97), .2], [1. / 15, np.sqrt(1. / 153), np.sqrt(1. / 97), .2, .2]], dtype=np.float32)) actual = similarity_model.affinities(user_interests, documents) self.assertAllClose(self.evaluate(expected), self.evaluate(actual.get('affinities')))
def dis_test_real_slate_docs(self): # Upscale the parameters in this test and override the default test setup. self._num_users = 50 self._num_docs = 100 self._num_topics = 20 self._slate_size = 5 self._config = { 'history_length': 5, 'num_users': self._num_users, 'num_docs': self._num_docs, 'num_topics': self._num_topics, 'slate_size': self._slate_size, } doc_state = Value(state=ed.Deterministic(loc=tf.random.uniform( shape=[self._config['num_users'], 5], minval=0, maxval=self._config['num_docs'], dtype=tf.int32))).prefixed_with('doc_history') consumption_state = Value(state=ed.Deterministic(loc=tf.random.uniform( shape=[self._config['num_users'], 5], minval=0.0, maxval=1.0, dtype=tf.float32))).prefixed_with('ctime_history') available_docs = Value( doc_id=ed.Deterministic(loc=tf.range( start=1, limit=self._config['num_docs'] + 1, dtype=tf.int32)), doc_topic=ed.Deterministic(loc=tf.ones((self._num_docs, ))), doc_quality=ed.Normal(loc=tf.zeros((self._config['num_docs'], )), scale=0.1), doc_features=ed.Deterministic( loc=tf.ones((self._num_docs, self._num_topics)) * 1.0 / self._num_topics), doc_length=ed.Deterministic(loc=tf.ones((self._num_docs, )))) self._recommender = cf_recommender.CollabFilteringRecommender( self._config) slate_docs = self.evaluate( self._recommender.slate_docs(doc_state.union(consumption_state), {}, available_docs).as_dict) # Verify all the shapes and presented keys. self.assertCountEqual([ 'doc_id', 'doc_topic', 'doc_quality', 'doc_features', 'doc_length' ], slate_docs.keys()) np.testing.assert_array_equal( [self._config['num_users'], self._config['slate_size']], np.shape(slate_docs['doc_id'])) np.testing.assert_array_equal( [self._config['num_users'], self._config['slate_size']], np.shape(slate_docs['doc_topic'])) np.testing.assert_array_equal( [self._config['num_users'], self._config['slate_size']], np.shape(slate_docs['doc_quality'])) np.testing.assert_array_equal([ self._config['num_users'], self._config['slate_size'], self._config['num_topics'] ], np.shape(slate_docs['doc_features'])) np.testing.assert_array_equal( [self._config['num_users'], self._config['slate_size']], np.shape(slate_docs['doc_length']))
def test_response(self): self._user = ie_user.InterestEvolutionUser( self._config, no_click_mass=self._no_click_mass) # Create a slate with one document only. doc_features = [[[1., 0., 0., 0., 0.]], [[1., 0., 0., 0., 0.]], [[0., 0., 1., 0., 0.]], [[0., 1., 0., 0., 0.]], [[0., 0., 1., 0., 0.]]] slate_docs = Value( doc_id=ed.Deterministic( loc=tf.constant([[1], [2], [3], [4], [5]])), doc_topic=ed.Deterministic( loc=tf.constant([[0], [1], [2], [3], [4]])), doc_quality=ed.Deterministic( loc=tf.constant([[0.], [0.], [0.], [0.], [0.]])), doc_features=ed.Deterministic(loc=tf.constant(doc_features)), doc_length=ed.Deterministic( loc=tf.constant([[1.], [1.], [1.], [1.], [1.]])), ) user_state = Value(state=ed.Deterministic( loc=[[.1, .1, .1, .1, .1], [.2, .2, .2, .2, .2], [0., 0., 0., 0., 0.], [.4, .4, .4, .4, .4], [.5, .5, .5, .5, .5]])).prefixed_with('interest') response = self.evaluate( self._user.next_response(user_state, slate_docs).as_dict) self.assertAllClose( { # The no click probability set to -np.Inf for all users. # Users will click on the only document presented to them. 'choice': [0, 0, 0, 0, 0], 'consumed_time': [0.5, 0.5, 0.5, 0.5, 0.5], }, response)
def chained_rv_test_network(self): # Creates variables to simulate the sequence # z[0] = (0., 1.) # z[t][0] = Normal(loc=z[t-1][0], scale=1) # z[t][1] = Normal(loc=z[t][0] + 1., scale=2) obs_0 = tf.constant([0., 1., 2., 3.]) obs_1 = tf.constant([1., 2., 3., 4.]) o = data.data_variable( name="o", spec=ValueSpec(a0=FieldSpec(), a1=FieldSpec()), data_sequence=data.SlicedValue(value=Value(a0=obs_0, a1=obs_1))) z = Variable(name="z", spec=ValueSpec(a0=FieldSpec(), a1=FieldSpec())) z.initial_value = variable.value(lambda: Value( a0=ed.Deterministic(loc=0.), a1=ed.Deterministic(loc=1.))) def v(prev): a0 = ed.Normal(loc=prev.get("a0"), scale=1.) a1 = ed.Normal(loc=a0 + 1., scale=2.) return Value(a0=a0, a1=a1) z.value = variable.value(v, (z.previous, )) return z, o, obs_0, obs_1
def test_next_metrics(self): init_metrics = self._metrics.initial_metrics() user_state = Value(utilities=ed.Deterministic(loc=[0.5, 0.6, 0.4])) current_metrics = self._metrics.next_metrics(init_metrics, None, user_state, None, None) current_metrics = self._metrics.next_metrics(current_metrics, None, user_state, None, None) expected_metrics = { 'reward': [0.5, 0.6, 0.4], 'cumulative_reward': [1.0, 1.2, 0.8], } self.assertAllClose(expected_metrics, self.evaluate(current_metrics.as_dict))
def next_state(self, previous_state, user_response, slate_docs): """Increases click counts of content providers of consumed documents.""" chosen_docs = user_response.get("choice") chosen_doc_features = selectors.get_chosen(slate_docs, chosen_docs) provider_id = chosen_doc_features.get("provider_id") provider_id_one_hot = tf.one_hot(provider_id, self._num_providers, dtype=tf.float32) provider_click_count = ( self._discount * previous_state.get("provider_click_count") + tf.reduce_sum(provider_id_one_hot, 0)) return Value( provider_click_count=ed.Deterministic(provider_click_count))
def test_states(self): init_state = self._user.initial_state() user_intent = self.evaluate(init_state.as_dict['intent']) satisfication = self.evaluate(init_state.as_dict['satisfaction']) self.assertAllEqual(user_intent.shape, (self._num_users, self._num_topics)) self.assertAllClose(satisfication, 5. * np.ones(self._num_users)) slate_docs = Value( features=ed.Deterministic(loc=tf.zeros((self._num_users, self._slate_size, self._num_topics))), ) # Create a dummy response and check user interest shift. next_state = self.evaluate( self._user.next_state(init_state, None, slate_docs).as_dict) self.assertAllClose(next_state['intent'], user_intent) self.assertAllClose(next_state['satisfaction'], [4.794248, 5.006677, 4.724719, 4.8083])
def test_states(self): self._user = ie_user.InterestEvolutionUser( self._config, no_click_mass=self._no_click_mass) init_state = self._user.initial_state() user_interests = init_state.get('interest').get('state') np.testing.assert_array_equal( [self._config['num_users'], self._config['num_topics']], np.shape(user_interests)) # Create a dummy response and check user interest shift. doc_features = [[[1., 0., 0., 0., 0.]], [[1., 0., 0., 0., 0.]], [[0., 0., 1., 0., 0.]], [[0., 1., 0., 0., 0.]], [[0., 0., 1., 0., 0.]]] slate_docs = Value( doc_id=ed.Deterministic( loc=tf.constant([[1], [2], [3], [4], [5]])), doc_topic=ed.Deterministic( loc=tf.constant([[0], [1], [2], [3], [4]])), doc_quality=ed.Deterministic( loc=tf.constant([[0.], [0.], [0.], [0.], [0.]])), doc_features=ed.Deterministic(loc=tf.constant(doc_features)), doc_length=ed.Deterministic( loc=tf.constant([[1.], [1.], [1.], [1.], [1.]])), ) mock_response = Value( choice=ed.Deterministic( loc=tf.zeros((self._num_users, ), dtype=tf.int32)), consumed_time=ed.Deterministic(loc=tf.ones((self._num_users, )))) next_state = self._user.next_state(init_state, mock_response, slate_docs) chosen_docs = mock_response.get('choice') chosen_doc_features = selector_lib.get_chosen(slate_docs, chosen_docs) response_doc_quality = chosen_doc_features.get('doc_quality') response_doc_features = chosen_doc_features.get('doc_features') expected_direction = response_doc_quality * (response_doc_features - user_interests) expected_user_interests_update = (self._interest_step_size * expected_direction) expected_user_interests = user_interests + expected_user_interests_update expected_user_interests = ( 4.0 * self._user._max_user_affinity * (tf.sigmoid(expected_user_interests / self._user._max_user_affinity) - 0.5)) self.assertAllClose(expected_user_interests, next_state.get('interest').get('state'))
def initial_state(self): return Value( utilities=ed.Deterministic(tf.zeros((self._num_users, ))), user_interests=self._interest_model.initial_state().get('state'))
def initial_state(self): """The initial state which sets all provider click counts to zero.""" return Value(provider_click_count=ed.Deterministic( tf.zeros((self._num_providers, ), dtype=tf.float32)))
def dis_test_mock_model_slate_docs(self): doc_history = Value(state=ed.Deterministic(loc=tf.constant( [[1, 2, 3, 4, 4], [3, 0, 0, 0, 0], [2, 2, 2, 3, 0]], dtype=tf.int32))) ctime_history = Value(state=ed.Deterministic( loc=tf.constant(tf.ones((3, 5)), dtype=tf.float32))) recommender_state = doc_history.prefixed_with('doc_history').union( ctime_history.prefixed_with('ctime_history')) # There are 5 docs in the corpus, construct available_docs pool. doc_features = [[1., 0., 0., 0., 0.], [1., 0., 0., 0., 0.], [0., 0., 1., 0., 0.], [0., 1., 0., 0., 0.], [0., 0., 1., 0., 0.]] available_docs = Value( doc_id=ed.Deterministic( loc=tf.constant([1, 2, 3, 4, 5], dtype=tf.int32)), doc_topic=ed.Deterministic( loc=tf.constant([1, 2, 3, 4, 5], dtype=tf.int32)), doc_quality=ed.Deterministic( loc=tf.constant([.1, .2, .3, .4, .5])), doc_features=ed.Deterministic(loc=tf.constant(doc_features)), doc_length=ed.Deterministic( loc=tf.constant([1, 1, 1, 1, 1], dtype=tf.int32))) # Return scores. scores_to_return = tf.concat( [ [ # user 1 scores. [.1], [.2], [.3], [.4], [.5], # user 2 scores [.5], [.4], [.3], [.2], [.1], # user 3 scores [0.], [0.], [3.], [4.], [5.] ], ], axis=0) self._recommender = cf_recommender.CollabFilteringRecommender( self._config, model_ctor=functools.partial(MockModel, model_output=scores_to_return)) slate_docs = self.evaluate( self._recommender.slate_docs(recommender_state, {}, available_docs).as_dict) # Verify returned docs. self.assertAllClose( { 'doc_id': [[5, 4], [1, 2], [5, 4]], 'doc_topic': [[5, 4], [1, 2], [5, 4]], 'doc_quality': [[.5, .4], [.1, .2], [.5, .4]], 'doc_features': [ [[0., 0., 1., 0., 0.], [0., 1., 0., 0., 0.]], [[1., 0., 0., 0., 0.], [1., 0., 0., 0., 0.]], [[0., 0., 1., 0., 0.], [0., 1., 0., 0., 0.]], ], 'doc_length': [[1, 1], [1, 1], [1, 1]], }, slate_docs) # Verify correct inputs were passed to the model. actual_input_docs = self.evaluate( self._recommender._model._cached_input_docs) self.assertAllEqual(doc_history.get('state').value, actual_input_docs) actual_input_ctimes = self.evaluate( self._recommender._model._cached_input_ctimes) self.assertAllClose( ctime_history.get('state').value, actual_input_ctimes)
def test_smoke(self): o = data.data_variable(name="o", spec=ValueSpec(a=FieldSpec()), data_sequence=data.SlicedValue(value=Value( a=tf.constant([0., 1., 2., 3.])))) # This computes the log-probability of a sequence # x[0] = 0. # x[t] = Normal(loc=x[t-1], scale=1) # against the observation # o = [0., 1., 2., 3.] x = Variable(name="x", spec=ValueSpec(a=FieldSpec())) x.initial_value = variable.value( lambda: Value(a=ed.Deterministic(loc=0.))) x.value = variable.value( lambda x_prev: Value(a=ed.Normal(loc=x_prev.get("a"), scale=1.)), (x.previous, )) self.assertAllClose( 0., log_probability.log_probability(variables=[x], observation=[o], num_steps=0)) self.assertAllClose( -1.4189385, log_probability.log_probability(variables=[x], observation=[o], num_steps=1)) self.assertAllClose( -2.837877, log_probability.log_probability(variables=[x], observation=[o], num_steps=2)) self.assertAllClose( -4.2568154, log_probability.log_probability(variables=[x], observation=[o], num_steps=3)) # This is an example of a field value that is not a random variable (y.t). # This computes the log-probability of a sequence # y[t] = Normal(loc=t, scale=1) # against the observation # o = [0., 1., 2., 3.] y = data.data_variable( name="y", spec=ValueSpec(a=FieldSpec()), data_sequence=data.TimeSteps(), output_fn=lambda t: Value(a=ed.Normal(loc=float(t), scale=1.))) self.assertAllClose( -0.918939, log_probability.log_probability(variables=[y], observation=[o], num_steps=0)) self.assertAllClose( -1.837877, log_probability.log_probability(variables=[y], observation=[o], num_steps=1)) self.assertAllClose( -2.756815, log_probability.log_probability(variables=[y], observation=[o], num_steps=2)) self.assertAllClose( -3.675754, log_probability.log_probability(variables=[y], observation=[o], num_steps=3))
def observation(self, user_state): # user_interests are fully observable. return Value(user_interests=ed.Deterministic( loc=user_state.get('user_interests')))
def initial_state(self): """The state value after the initial value.""" return Value(satisfaction=ed.Deterministic( self._initial_satisfication * tf.ones(self._num_users)), intent=self._intent_model.initial_state().get('state'), max_slate_utility=tf.zeros(self._num_users))
def initial_metrics(self): """The initial metrics value.""" return Value(reward=ed.Deterministic(loc=tf.zeros([self._num_users])), cumulative_reward=ed.Deterministic( loc=tf.zeros([self._num_users])))