Example #1
0
 def _get_unmasked_q_values(self, q_network, state: rlt.FeatureData,
                            slate: rlt.DocList) -> torch.Tensor:
     """ Gets the q values from the model and target networks """
     batch_size, slate_size, _ = slate.float_features.shape
     # TODO: Probably should create a new model type
     return q_network(state.repeat_interleave(slate_size, dim=0),
                      slate.as_feature_data()).view(batch_size, slate_size)
Example #2
0
 def _get_unmask_q_values(
     self,
     q_network,
     state: rlt.FeatureData,
     action: rlt.PreprocessedSlateFeatureVector,
 ) -> torch.Tensor:
     batch_size, slate_size, _ = action.float_features.shape
     return q_network(
         state.repeat_interleave(slate_size, dim=0),
         action.as_preprocessed_feature_vector(),
     ).view(batch_size, slate_size)
Example #3
0
    def score(preprocessed_obs: rlt.FeatureData) -> torch.Tensor:
        tiled_state = preprocessed_obs.repeat_interleave(repeats=num_actions,
                                                         axis=0)

        actions = rlt.FeatureData(float_features=torch.eye(num_actions))

        q_network.eval()
        scores = q_network(tiled_state.state, actions).view(-1, num_actions)
        assert (
            scores.size(1) == num_actions
        ), f"scores size is {scores.size(0)}, num_actions is {num_actions}"
        q_network.train()
        return F.log_softmax(scores, dim=-1)
Example #4
0
    def score(state: rlt.FeatureData) -> torch.Tensor:
        tiled_state = state.repeat_interleave(repeats=num_candidates, axis=0)
        candidate_docs = state.candidate_docs
        assert candidate_docs is not None
        actions = candidate_docs.as_feature_data()

        q_network.eval()
        scores = q_network(tiled_state, actions).view(-1, num_candidates)
        q_network.train()

        select_prob = F.softmax(candidate_docs.value, dim=1)
        assert select_prob.shape == scores.shape

        return select_prob * scores