def to_method_object(self): """Convert the enum to an instance of `BaselineMethod`.""" if self == self.TF_IDF: return keyword_based.TfIdfMethod() elif self == self.BM25: return keyword_based.BM25Method() elif self == self.USE_SIM: return vector_based.VectorSimilarityMethod( encoder=vector_based.TfHubEncoder( "https://tfhub.dev/google/" "universal-sentence-encoder/2")) elif self == self.USE_LARGE_SIM: return vector_based.VectorSimilarityMethod( encoder=vector_based.TfHubEncoder( "https://tfhub.dev/google/" "universal-sentence-encoder-large/3")) elif self == self.ELMO_SIM: return vector_based.VectorSimilarityMethod( encoder=vector_based.TfHubEncoder( "https://tfhub.dev/google/elmo/1")) elif self == self.USE_MAP: return vector_based.VectorMappingMethod( encoder=vector_based.TfHubEncoder( "https://tfhub.dev/google/" "universal-sentence-encoder/2")) elif self == self.USE_LARGE_MAP: return vector_based.VectorMappingMethod( encoder=vector_based.TfHubEncoder( "https://tfhub.dev/google/" "universal-sentence-encoder-large/3")) elif self == self.ELMO_MAP: return vector_based.VectorMappingMethod( encoder=vector_based.TfHubEncoder( "https://tfhub.dev/google/elmo/1")) raise ValueError("Unknown method {}".format(self))
def test_train_then_rank(self): mock_encoder = mock.Mock() def _random_encode(texts): return np.random.normal(size=(len(texts), 3)) mock_encoder.encode.side_effect = _random_encode method = vector_based.VectorMappingMethod(mock_encoder, learning_rates=[1], regularizers=[0]) # Use 104 elements, so that the encoding must be batched. method.train(["context"] * 104, ["response"] * 104) mock_encoder.encode.assert_has_calls([ mock.call(["context"] * 100), mock.call(["response"] * 100), mock.call(["context"] * 4), mock.call(["response"] * 4), ]) assignments = method.rank_responses(["x", "y", "z"], ["a", "b", "c"]) self.assertEqual((3, ), assignments.shape) for id_ in assignments: self.assertGreaterEqual(id_, 0) self.assertLess(id_, 3)
def to_method_object(self): """Convert the enum to an instance of `BaselineMethod`.""" if self == self.TF_IDF: return keyword_based.TfIdfMethod() elif self == self.BM25: return keyword_based.BM25Method() elif self == self.USE_SIM: return vector_based.VectorSimilarityMethod( encoder=vector_based.TfHubEncoder( "https://tfhub.dev/google/" "universal-sentence-encoder/2")) elif self == self.USE_LARGE_SIM: return vector_based.VectorSimilarityMethod( encoder=vector_based.TfHubEncoder( "https://tfhub.dev/google/" "universal-sentence-encoder-large/3")) elif self == self.ELMO_SIM: return vector_based.VectorSimilarityMethod( encoder=vector_based.TfHubEncoder( "https://tfhub.dev/google/elmo/1")) elif self == self.USE_MAP: return vector_based.VectorMappingMethod( encoder=vector_based.TfHubEncoder( "https://tfhub.dev/google/" "universal-sentence-encoder/2")) elif self == self.USE_LARGE_MAP: return vector_based.VectorMappingMethod( encoder=vector_based.TfHubEncoder( "https://tfhub.dev/google/" "universal-sentence-encoder-large/3")) elif self == self.ELMO_MAP: return vector_based.VectorMappingMethod( encoder=vector_based.TfHubEncoder( "https://tfhub.dev/google/elmo/1")) elif self == self.BERT_SMALL_SIM: return vector_based.VectorSimilarityMethod( encoder=vector_based.BERTEncoder( "https://tfhub.dev/google/" "bert_uncased_L-12_H-768_A-12/1")) elif self == self.BERT_SMALL_MAP: return vector_based.VectorMappingMethod( encoder=vector_based.BERTEncoder( "https://tfhub.dev/google/" "bert_uncased_L-12_H-768_A-12/1")) elif self == self.BERT_LARGE_SIM: return vector_based.VectorSimilarityMethod( encoder=vector_based.BERTEncoder( "https://tfhub.dev/google/" "bert_uncased_L-24_H-1024_A-16/1")) elif self == self.BERT_LARGE_MAP: return vector_based.VectorMappingMethod( encoder=vector_based.BERTEncoder( "https://tfhub.dev/google/" "bert_uncased_L-24_H-1024_A-16/1")) elif self == self.USE_QA: return vector_based.VectorSimilarityMethod( encoder=vector_based.TfHubEncoder( "https://tfhub.dev/google/" "universal-sentence-encoder-multilingual-qa/1", is_dual=True)) raise ValueError("Unknown method {}".format(self))