예제 #1
0
파일: test_layers.py 프로젝트: zoovu/rasa
def test_multi_label_dot_product_loss__loss_sigmoid_is_ln2_when_all_similarities_zero(
):
    batch_size = 2
    num_candidates = 2
    sim_pos = tf.zeros([batch_size, 1, 1], dtype=tf.float32)
    sim_candidates_il = tf.zeros([batch_size, 1, num_candidates],
                                 dtype=tf.float32)
    pos_neg_labels = tf.cast(
        tf.random.uniform([batch_size, num_candidates]) < 0.5, tf.float32)

    layer = MultiLabelDotProductLoss(num_candidates,
                                     scale_loss=False,
                                     similarity_type=INNER)
    loss = layer._loss_sigmoid(sim_pos, sim_candidates_il, pos_neg_labels)
    assert abs(loss.numpy() - np.math.log(2.0)) < 1e-6
예제 #2
0
파일: test_layers.py 프로젝트: zoovu/rasa
def test_multi_label_dot_product_loss__compute_accuracy_with_and_without_mask(
    sim_pos: np.ndarray,
    sim_candidates_il: np.ndarray,
    pos_neg_labels: np.ndarray,
    mask: Optional[np.ndarray],
    expected_accuracy: float,
):
    layer = MultiLabelDotProductLoss(num_candidates=3)

    accuracy = layer._accuracy(
        np.expand_dims(sim_pos, 1).astype(np.float32),
        np.expand_dims(sim_candidates_il, 1).astype(np.float32),
        pos_neg_labels.astype(np.float32),
        mask,
    ).numpy()

    assert np.isclose([accuracy], [expected_accuracy])
예제 #3
0
파일: test_layers.py 프로젝트: zoovu/rasa
def test_multi_label_dot_product_loss__construct_label_padding_mask(
    label_ids: List[List[int]],
    num_candidates: int,
    expected_pos_label_mask: List[List[int]],
):
    actual_label_mask = MultiLabelDotProductLoss._construct_mask_for_label_padding(
        np.expand_dims(label_ids, -1), num_candidates).numpy()

    pos_label_columns = np.array(label_ids).shape[1]

    # First check if the mask corresponding to guaranteed positive label ids is correct.
    assert np.all(actual_label_mask[:, :pos_label_columns] == np.array(
        expected_pos_label_mask).astype(np.float32))

    # Next check if the mask corresponding to sampled candidates is correct.
    assert np.all(actual_label_mask[:, pos_label_columns:] == np.ones(
        (len(label_ids), num_candidates), dtype=np.float32))
예제 #4
0
파일: test_layers.py 프로젝트: zoovu/rasa
def test_multi_label_dot_product_loss_call_shapes():
    num_neg = 1
    layer = MultiLabelDotProductLoss(num_neg)
    batch_inputs_embed = tf.constant([[[0, 1, 2]], [[-2, 0, 2]]],
                                     dtype=tf.float32)
    batch_labels_embed = tf.constant(
        [[[0, 0, 1], [1, 0, 0]], [[0, 1, 0], [1, 0, 0]]], dtype=tf.float32)
    batch_labels_ids = tf.constant([[[2], [0]], [[1], [0]]], dtype=tf.float32)
    all_labels_embed = tf.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]],
                                   dtype=tf.float32)
    all_labels_ids = tf.constant([[0], [1], [2]], dtype=tf.float32)
    mask = None

    loss, accuracy = layer(
        batch_inputs_embed,
        batch_labels_embed,
        batch_labels_ids,
        all_labels_embed,
        all_labels_ids,
        mask,
    )

    assert len(tf.shape(loss)) == 0
    assert len(tf.shape(accuracy)) == 0
예제 #5
0
파일: test_layers.py 프로젝트: zoovu/rasa
def test_multi_label_dot_product_loss__sample_candidates_with_variable_number_of_labels(
    monkeypatch: MonkeyPatch, ):
    num_candidates = 2
    num_features = 4
    batch_size = 3
    layer = MultiLabelDotProductLoss(num_candidates)

    # Seven random vectors for inputs and labels
    i0, i1, i2, l0, l1, l2, l3 = np.round(
        np.random.uniform(-100, 100, size=[7, num_features])).tolist()
    # Label used for padding
    lp = [-1] * num_features

    # Each example in the batch has one input
    batch_inputs_embed = tf.constant([[i0], [i1], [i2]], dtype=tf.float32)
    # Each input can have multiple labels (lp serves as a placeholder)
    batch_labels_embed = tf.constant(
        [[l0, l1, l3], [l2, lp, lp], [l3, l0, lp]], dtype=tf.float32)
    # We assign the corresponding indices
    batch_labels_ids = tf.constant(
        [[[0], [1], [3]], [[2], [-1], [-1]], [[3], [0], [-1]]],
        dtype=tf.float32)
    # List all the labels and ids in play
    all_labels_embed = tf.constant([l0, l1, l2, l3], dtype=tf.float32)
    all_labels_ids = tf.constant([[0], [1], [2], [3]], dtype=tf.float32)

    # Inside `layer._sample_candidates` random indices will be generated for the
    # candidates. We mock them to have a deterministic output.
    mock_indices = [0, 2, 0, 1, 3, 1]

    def mock_random_indices(*args, **kwargs) -> tf.Tensor:
        return tf.reshape(tf.constant(mock_indices),
                          [batch_size, num_candidates])

    monkeypatch.setattr(layers_utils, "random_indices", mock_random_indices)

    # Now run the function we want to test
    (
        pos_inputs_embed,
        pos_labels_embed,
        candidate_labels_embed,
        pos_neg_labels,
    ) = layer._sample_candidates(
        batch_inputs_embed,
        batch_labels_embed,
        batch_labels_ids,
        all_labels_embed,
        all_labels_ids,
    )
    # The inputs just stay the inputs, up to an extra dimension
    assert np.all(pos_inputs_embed.numpy() == tf.expand_dims(
        batch_inputs_embed, axis=-2).numpy())
    # All example labels of each batch are in `pos_labels_embed`
    assert np.all(pos_labels_embed.numpy() == np.array(
        [[[l0, l1, l3]], [[l2, lp, lp]], [[l3, l0, lp]]]))
    # The candidate label embeddings are picked according to the `mock_indices` above.
    # E.g. a 2 coming from `mock_indices` means that `all_labels_embed[2]` is picked,
    # i.e. `l2`.
    assert np.all(candidate_labels_embed.numpy() == np.array(
        [[[l0, l2]], [[l0, l1]], [[l3, l1]]]))
    # The `pos_neg_labels` contains `1`s wherever the vector in `candidate_labels_embed`
    # of example `i` is actually in the possible lables of example `i`
    assert np.all(pos_neg_labels.numpy() == np.array([
        [
            1,
            0,
        ],  # l0 is an actual positive example in `batch_labels_embed[0]`,
        # whereas l2 is not
        [
            0,
            0,
        ],  # Neither l0 nor l1 are positive examples in `batch_labels_embed[1]`
        [
            1,
            0,
        ],  # l3 is an actual positive example in `batch_labels_embed[2]`,
        # whereas l1 is not
    ]))