def test_encode_context_feature_values_with_prefix(self, mock_module_cls): def mock_fn(input, signature=None): self.assertIn(signature, {"encode_context", "encode_response", None}) if signature == "encode_context": self.assertIsInstance(input, dict) self.assertEqual(2, len(input)) for input_t in input.values(): self.assertEqual(input_t.dtype, tf.string) mock_module_cls.return_value = mock_fn with mock.patch("encoder_client._batch_session_run") as f: client = encoder_client.EncoderClient( "test_uri", use_extra_context=True, max_extra_contexts=3, use_extra_context_prefixes=True, cache_size=0, ) mock_module_cls.assert_called_with("test_uri") encodings = client.encode_contexts( ["hello", "hi", "yo"], extra_contexts=[ ["a", "b", "c", "d d"], ["A", "B", "C", "D", "E", "F"], [], ], ) f.assert_called_once() self.assertEqual( ["0: d d 1: c 2: b", "0: F 1: E 2: D", ""], list(f.call_args[0][1][client._fed_extra_contexts]), ) self.assertEqual(f.return_value, encodings)
def test_encode_context(self, mock_module_cls): def mock_fn(input, signature=None): self.assertIn(signature, {"encode_context", "encode_response", None}) if signature == "encode_context": self.assertIsInstance(input, dict) self.assertEqual(2, len(input)) for input_t in input.values(): self.assertEqual(input_t.dtype, tf.string) return tf.ones([tf.shape(input_t)[0], 3]) mock_module_cls.return_value = mock_fn client = encoder_client.EncoderClient("test_uri", use_extra_context=True) mock_module_cls.assert_called_with("test_uri") encodings = client.encode_contexts( ["hello", "hi", "yo"], extra_contexts=[ ["a", "b", "c", "d"], ["A", "B", "C", "D", "E", "F"], [], ], ) np.testing.assert_allclose([[1., 1., 1.]] * 3, encodings)
def test_encode_sentences_batching_caching(self, mock_module_cls): def mock_fn(input, signature=None): self.assertIn(signature, {"encode_context", "encode_response", None}) self.assertIsInstance(input, tf.Tensor) self.assertEqual(input.dtype, tf.string) if signature is None: return tf.random_normal([tf.shape(input)[0], 3]) mock_module_cls.return_value = mock_fn client = encoder_client.EncoderClient( # force batching by setting batch size to 3 "test_uri", internal_batch_size=3, cache_size=100, ) mock_module_cls.assert_called_with("test_uri") encodings = client.encode_sentences( ["a", "a", "b", "c", "d", "e", "f", "g"]) # Test de-duplication: np.testing.assert_allclose(encodings[0], encodings[1]) encodings_2 = client.encode_sentences(["a", "b", "c", "z"]) # Test caching np.testing.assert_allclose(encodings[0], encodings_2[0]) np.testing.assert_allclose(encodings[2], encodings_2[1]) np.testing.assert_allclose(encodings[3], encodings_2[2])
def __init__(self, train_x, train_y, valid_x, valid_y): self.train_x = train_x self.train_y = train_y self.valid_x = valid_x self.valid_y = valid_y self.n_labels = self.valid_y.shape[1] self.module = encoder_client.EncoderClient( "http://models.poly-ai.com/convert/v1/model.tar.gz")
def test_encode_response(self, mock_module_cls): def mock_fn(input, signature=None): self.assertIn(signature, {"encode_context", "encode_response", None}) self.assertIsInstance(input, tf.Tensor) self.assertEqual(input.dtype, tf.string) if signature == "encode_response": return tf.ones([tf.shape(input)[0], 3]) mock_module_cls.return_value = mock_fn client = encoder_client.EncoderClient("test_uri") mock_module_cls.assert_called_with("test_uri") encodings = client.encode_responses(["hello"]) np.testing.assert_allclose([[1, 1, 1]], encodings)
# Find the position of 0 in the argsort, as index 0 is the correct # response. ranks.append((-scores).argsort().argmin()) if (i + 1) % 100 == 0: glog.info(f"Scored {i + 1} / {len(examples)} examples.") ranks = numpy.asarray(ranks) for k in [1, 10, 50]: recall_at_k = (ranks < k).mean() glog.info(f"Recall@{k} = {recall_at_k:.3f}") mrr = (1 / (1.0 + ranks)).mean() glog.info(f"MRR = {mrr:.3f}") if __name__ == "__main__": flags = _parse_args() client = encoder_client.EncoderClient( flags.encoder, use_extra_context=True, use_extra_context_prefixes=True, max_extra_contexts=10, ) reader = test_reader.TestReader( examples_json=flags.examples_json, labels_tsv=flags.labels_tsv, ) _evaluate(client, reader.examples)
print(sequence_encoding) print(sequence_encoding.shape) tokens = encoding['tokens'] print(tokens) tokenized_encoding = sess.run(encoding_tensor2, feed_dict={text_placeholder: text}) # print(encoding.shape) print(tokenized_encoding) if sess is not None: sess.close() # --------------------------- experiment convert encode client # encode_sentences import encoder_client import pdb # Internally it implements caching, deduplication, and batching, to help speed up encoding. Note that because it does batching internally, you can pass very large lists of sentences to encode without going out of memory. client = encoder_client.EncoderClient( "http://models.poly-ai.com/convert/v1/model.tar.gz") # find good responses to the folowing context context_encodings = client.encode_contexts(["What's your name?"]) # rank the following reponses as candidates candidate_responses = ["No, thanks.", "I'm Matt", "Hey", "I have a dog"] response_encodings = client.encode_responses(candidate_responses) # compute score using dot product scores = response_encodings.dot(context_encodings.T).flatten() # output top score response top_idx = scores.argmax() pdb.set_trace() print('Best response: {}, score: {:.3f}'.format(candidate_responses[top_idx], scores[top_idx]))