Esempio n. 1
0
    def test_task(self):

        query = tf.constant([[1, 2, 3], [2, 3, 4]], dtype=tf.float32)
        candidate = tf.constant([[1, 1, 1], [1, 1, 0]], dtype=tf.float32)
        candidate_dataset = tf.data.Dataset.from_tensor_slices(
            np.array([[0, 0, 0]] * 20, dtype=np.float32))

        task = retrieval.Retrieval(metrics=metrics.FactorizedTopK(
            candidates=candidate_dataset.batch(16),
            metrics=[
                tf.keras.metrics.TopKCategoricalAccuracy(
                    k=5, name="factorized_categorical_accuracy_at_5")
            ]))

        # All_pair_scores: [[6, 3], [9, 5]].
        # Normalized logits: [[3, 0], [4, 0]].
        expected_loss = -np.log(_sigmoid(3.0)) - np.log(1 - _sigmoid(4.0))
        expected_metrics = {
            "factorized_categorical_accuracy_at_5": 1.0,
        }

        loss = task(query_embeddings=query, candidate_embeddings=candidate)
        metrics_ = {
            metric.name: metric.result().numpy()
            for metric in task.metrics
        }

        self.assertIsNotNone(loss)
        self.assertAllClose(expected_loss, loss)
        self.assertAllClose(expected_metrics, metrics_)
Esempio n. 2
0
    def test_task_graph(self):

        with tf.Graph().as_default():
            with tf.compat.v1.Session() as sess:
                query = tf.constant([[1, 2, 3], [2, 3, 4]], dtype=tf.float32)
                candidate = tf.constant([[1, 1, 1], [1, 1, 0]],
                                        dtype=tf.float32)
                candidate_dataset = tf.data.Dataset.from_tensor_slices(
                    np.array([[0, 0, 0]] * 20, dtype=np.float32))

                task = retrieval.Retrieval(metrics=metrics.FactorizedTopK(
                    candidates=candidate_dataset.batch(16),
                    metrics=[
                        tf.keras.metrics.TopKCategoricalAccuracy(
                            k=5, name="factorized_categorical_accuracy_at_5")
                    ]))

                expected_metrics = {
                    "factorized_categorical_accuracy_at_5": 1.0,
                }

                loss = task(query_embeddings=query,
                            candidate_embeddings=candidate)

                sess.run([var.initializer for var in task.variables])
                for metric in task.metrics:
                    sess.run([var.initializer for var in metric.variables])
                sess.run(loss)

                metrics_ = {
                    metric.name: sess.run(metric.result())
                    for metric in task.metrics
                }

                self.assertAllClose(expected_metrics, metrics_)
Esempio n. 3
0
            def __init__(self, candidate_dataset):
                super().__init__()

                self.query_model = tf.keras.layers.Dense(16)
                self.candidate_model = tf.keras.layers.Dense(16)

                self.task = tasks.Retrieval(metrics=metrics.FactorizedTopK(
                    candidates=candidate_dataset.map(self.candidate_model),
                    ks=[5],
                ))
Esempio n. 4
0
            def __init__(self, candidate_dataset):
                super().__init__()

                self.query_model = tf.keras.layers.Dense(16)
                self.candidate_model = tf.keras.layers.Dense(16)
                self.ctr_model = tf.keras.layers.Dense(1, activation="sigmoid")

                self.retrieval_task = tasks.Retrieval(
                    metrics=metrics.FactorizedTopK(
                        candidates=candidate_dataset.map(self.candidate_model),
                        ks=[5]))
                self.ctr_task = tasks.Ranking(
                    metrics=[tf.keras.metrics.AUC(name="ctr_auc")])
Esempio n. 5
0
            def __init__(self, candidate_dataset):
                super().__init__()

                self.query_model = tf.keras.layers.Dense(16)
                self.candidate_model = tf.keras.layers.Dense(16)

                self.task = tasks.Retrieval(metrics=metrics.FactorizedTopK(
                    candidates=candidate_dataset.map(self.candidate_model),
                    k=5,
                    metrics=[
                        tf.keras.metrics.TopKCategoricalAccuracy(
                            k=5, name="factorized_categorical_accuracy_at_5")
                    ]))
Esempio n. 6
0
    def test_factorized_top_k(self, top_k_layer):

        rng = np.random.RandomState(42)

        num_candidates, num_queries, embedding_dim = (100, 10, 4)

        candidates = rng.normal(size=(num_candidates,
                                      embedding_dim)).astype(np.float32)
        query = rng.normal(size=(num_queries,
                                 embedding_dim)).astype(np.float32)
        true_candidates = rng.normal(size=(num_queries,
                                           embedding_dim)).astype(np.float32)

        positive_scores = (query * true_candidates).sum(axis=1, keepdims=True)
        candidate_scores = query @ candidates.T
        all_scores = np.concatenate([positive_scores, candidate_scores],
                                    axis=1)

        ks = [1, 5, 10, 50]

        candidates = tf.data.Dataset.from_tensor_slices(candidates).batch(32)

        if top_k_layer is not None:
            candidates = top_k_layer().index(candidates)

        metric = metrics.FactorizedTopK(
            candidates=candidates,
            metrics=[
                tf.keras.metrics.TopKCategoricalAccuracy(
                    k=x, name=f"top_{x}_categorical_accuracy") for x in ks
            ],
            k=max(ks),
        )
        metric.update_state(query_embeddings=query,
                            true_candidate_embeddings=true_candidates)

        for k, metric_value in zip(ks, metric.result()):
            in_top_k = tf.math.in_top_k(targets=np.zeros(num_queries).astype(
                np.int32),
                                        predictions=all_scores,
                                        k=k)

            self.assertAllClose(metric_value, in_top_k.numpy().mean())
Esempio n. 7
0
    def test_factorized_top_k(self, top_k_layer, use_candidate_ids):

        rng = np.random.RandomState(42)

        num_candidates, num_queries, embedding_dim = (100, 10, 4)

        candidate_ids = np.arange(0, num_candidates).astype(str)
        candidates = rng.normal(size=(num_candidates,
                                      embedding_dim)).astype(np.float32)

        query = rng.normal(size=(num_queries,
                                 embedding_dim)).astype(np.float32)

        true_candidate_indexes = rng.randint(0,
                                             num_candidates,
                                             size=num_queries)
        true_candidate_embeddings = candidates[true_candidate_indexes]
        true_candidate_ids = candidate_ids[true_candidate_indexes]

        candidate_scores = query @ candidates.T

        ks = [1, 5, 10, 50]

        candidates = tf.data.Dataset.from_tensor_slices(
            (candidate_ids, candidates)).batch(32)

        if top_k_layer is not None:
            candidates = top_k_layer().index_from_dataset(candidates)

        metric = metrics.FactorizedTopK(candidates=candidates, ks=ks)
        metric.update_state(
            query_embeddings=query,
            true_candidate_embeddings=true_candidate_embeddings,
            true_candidate_ids=true_candidate_ids
            if use_candidate_ids else None)

        for k, metric_value in zip(ks, metric.result()):
            in_top_k = tf.math.in_top_k(targets=true_candidate_indexes,
                                        predictions=candidate_scores,
                                        k=k)

            self.assertAllClose(metric_value, in_top_k.numpy().mean())
Esempio n. 8
0
    def test_id_based_evaluation(self, layer):

        rng = np.random.default_rng(42)

        k = 100
        num_candidates, num_queries, embedding_dim = (1280, 128, 128)
        candidates = rng.normal(size=(num_candidates,
                                      embedding_dim)).astype(np.float32)
        queries = rng.normal(size=(num_queries,
                                   embedding_dim)).astype(np.float32)
        true_candidate_indices = rng.integers(0,
                                              num_candidates,
                                              size=num_queries).astype(
                                                  np.int32)

        index = layer(k=k).index_from_dataset(
            tf.data.Dataset.from_tensor_slices(candidates).batch(32))

        metric = metrics.FactorizedTopK(candidates=index, ks=[k])

        in_top_k = 0

        for query, true_candidate_idx in zip(queries, true_candidate_indices):

            metric.update_state(query.reshape(1, -1),
                                candidates[true_candidate_idx].reshape(1, -1),
                                np.array([true_candidate_idx]))

            top_scores, top_indices = index(query.reshape(1, -1))
            top_scores, top_indices = top_scores.numpy()[0], top_indices.numpy(
            )[0]

            if true_candidate_idx in top_indices.tolist():
                in_top_k += 1

        expected_metric = in_top_k / num_queries

        self.assertEqual(metric.result()[0], expected_metric)