def test(self):
        token_vocab = SimpleVocab(u'a b c d'.split())
        sequences = [
            ['a', 'b', 'c', 'd'],
            ['c', 'd'],
        ]

        correct_embeds = np.array(
            [[3, 4, 1, 5, 6, 0, 7, 8, 1], [0, 0, 0, 5, 6, 0, 7, 8, 1]],
            dtype=np.float32)

        with clean_session():
            token_embeds = tf.constant([
                [1, 2, 0],
                [3, 4, 1],
                [5, 6, 0],
                [7, 8, 1],
            ],
                                       dtype=tf.float32)
            model = ConcatSequenceEmbedder(token_embeds,
                                           seq_length=3,
                                           align='right')
            test_embeds = model.compute(model.embeds, sequences, token_vocab)

            # check that static shape inference works
            assert model.embeds.get_shape().as_list() == [None, 3 * 3]

        assert_array_almost_equal(correct_embeds, test_embeds, decimal=5)
Beispiel #2
0
    def test_multidim(self):
        npa = lambda arr: np.array(arr, dtype=np.float32)
        correct = npa([
            npa([4, 7, 10]) / 2,
            npa([8, 14, 20]) / 3,
            npa([13, 16, 19]) / 3,
        ])

        with clean_session():
            array = tf.constant([[[1., 2., 3.],
                                  [3., 5., 7.],
                                  [0., 0., 0.]],
                                 [[2., 4., 6.],
                                  [3., 5., 7.],
                                  [3., 5., 7.]],
                                 [[9., 9., 9.],
                                  [3., 5., 7.],
                                  [1., 2., 3.]]], dtype=tf.float32)
            mask = tf.constant([
                [1, 1, 0],
                [1, 1, 1],
                [1, 1, 1],
            ], dtype=tf.float32)

            bm = reduce_mean(SequenceBatch(array, mask))
            assert_almost_equal(bm.eval(), correct, decimal=5)
    def test(self):
        token_vocab = SimpleVocab(u'a b c d'.split())
        sequences = [
            ['a', 'b', 'c', 'd'],
            ['c', 'd'],
        ]

        correct_embeds = np.array([
            [1, 2, 0, 3, 4, 1, 5, 6, 0, 7, 8, 1],
            [5, 6, 0, 7, 8, 1, 0, 0, 0, 0, 0, 0],
        ],
                                  dtype=np.float32)

        with clean_session():
            token_embeds = tf.constant([
                [1, 2, 0],
                [3, 4, 1],
                [5, 6, 0],
                [7, 8, 1],
            ],
                                       dtype=tf.float32)
            model = ConcatSequenceEmbedder(token_embeds)
            test_embeds = model.compute(model.embeds, sequences, token_vocab)

        assert_array_almost_equal(correct_embeds, test_embeds, decimal=5)
Beispiel #4
0
    def test_batch_mean(self):
        correct = np.array([-2. / 3, 1., 21. / 4])

        with clean_session():
            array = tf.constant([
                [1, -8, 5, 4, 9],
                [0, 2, 7, 8, 1],
                [2, -8, 6, 4, 9],
            ], dtype=tf.float32)

            mask = tf.constant([
                [1, 1, 1, 0, 0],
                [1, 1, 0, 0, 0],
                [1, 0, 1, 1, 1],
            ], dtype=tf.float32)

            bad_mask = tf.constant([
                [1, 1, 1, 0, 0],
                [0, 0, 0, 0, 0],
                [1, 0, 1, 1, 1],
            ], dtype=tf.float32)

            bm = reduce_mean(SequenceBatch(array, mask))
            assert_almost_equal(bm.eval(), correct, decimal=5)

            bm2 = reduce_mean(SequenceBatch(array, bad_mask))

            with pytest.raises(InvalidArgumentError):
                bm2.eval()

            # try allow_empty option
            bm3 = reduce_mean(SequenceBatch(array, bad_mask), allow_empty=True)
            assert_almost_equal(bm3.eval(), np.array([-2. / 3, 0., 21. / 4]))
Beispiel #5
0
    def test_seq_length(self):
        tokens = '<unk> a b c'.split()
        unk = '<unk>'
        vocab = VocabExample(tokens, unk)
        sequences = [
            'a b a b c'.split(),  # more than length 4
            'a b'.split(),
            ['b'],
            ['c'],
        ]

        indices = np.array([
            [2, 1, 2, 3],
            [0, 0, 1, 2],
            [0, 0, 0, 2],
            [0, 0, 0, 3],
        ], dtype=np.int32)

        mask = np.array([
            [1, 1, 1, 1],
            [0, 0, 1, 1],
            [0, 0, 0, 1],
            [0, 0, 0, 1],
        ], dtype=np.float32)

        with clean_session():
            model = FeedSequenceBatch(align='right', seq_length=4)
            test_feed = model.inputs_to_feed_dict(sequences, vocab)
            correct = {model.values: indices, model.mask: mask}
            assert_array_collections_equal(correct, test_feed)

            indices = tf.identity(model.values)
            mask = tf.identity(model.mask)
            assert indices.get_shape().as_list() == [None, 4]
            assert mask.get_shape().as_list() == [None, 4]
Beispiel #6
0
    def test(self):
        npa = lambda arr: np.array(arr, dtype=np.float32)
        correct = npa([
            npa([3, 5, 7]),
            npa([3, 5, 7]),
            npa([9, 9, 9]),
        ])

        with clean_session():
            array = tf.constant(
                [[[1., 2., 3.], [3., 5., 7.], [100., 200., 2000.]],
                 [[2., 4., 6.], [3., 5., 7.], [3., 5., 7.]],
                 [[9., 9., 9.], [3., 5., 7.], [1., 2., 3.]]],
                dtype=tf.float32)
            mask = tf.constant([
                [1, 1, 0],
                [1, 1, 1],
                [1, 1, 1],
            ],
                               dtype=tf.float32)

            bm = reduce_max(SequenceBatch(array, mask))
            assert_almost_equal(bm.eval(), correct, decimal=5)

            bad_mask = tf.constant([
                [0, 0, 0],
                [1, 1, 1],
                [1, 1, 1],
            ],
                                   dtype=tf.float32)

            bm2 = reduce_mean(SequenceBatch(array, bad_mask))

            with pytest.raises(InvalidArgumentError):
                bm2.eval()
Beispiel #7
0
def test_expand_dims_for_broadcast():
    with clean_session():
        arr = tf.constant([
            [
                [1, 2, 3],
                [4, 5, 6],
                [4, 5, 6],
            ],
            [
                [1, 2, 3],
                [4, 5, 6],
                [4, 5, 6],
            ],
        ],
                          dtype=tf.float32)
        weights = tf.constant([1, 2], dtype=tf.float32)

        assert arr.get_shape().as_list() == [2, 3, 3]
        assert weights.get_shape().as_list() == [2]

        new_weights = expand_dims_for_broadcast(weights, arr)
        assert new_weights.eval().shape == (2, 1, 1)

        bad_weights = tf.constant([1, 2, 3], dtype=tf.float32)
        bad_new_weights = expand_dims_for_broadcast(bad_weights, arr)

        with pytest.raises(InvalidArgumentError):
            bad_new_weights.eval()
    def test_lstm(self):
        """Test whether the mask works properly for LSTM embedder."""
        token_vocab = SimpleVocab(u'a b c d'.split())
        sequences = [
            ['a', 'b', 'c', 'd'],
            ['c', 'd'],
            ['a', 'b', 'c', 'd'],
        ]
        sequences_alt = [
            ['a', 'b', 'c', 'd', 'a', 'b', 'd', 'c'],
            ['b', 'a', 'd'],
            ['c', 'd'],
        ]

        with clean_session():
            token_embeds = tf.constant([
                [1, 2, 0],
                [3, 4, 1],
                [5, 6, 0],
                [7, 8, 1],
            ],
                                       dtype=tf.float32)

            model = LSTMSequenceEmbedder(token_embeds,
                                         seq_length=4,
                                         hidden_size=7)
            test_embeds, test_hidden_states = model.compute(
                [model.embeds, model.hidden_states.values], sequences,
                token_vocab)
            assert test_embeds.shape == (3, 7)
            assert test_hidden_states.shape == (3, 4, 7)
            # Padded spaces should have the same hidden states
            assert_array_almost_equal(test_hidden_states[1, 1, :],
                                      test_hidden_states[1, 2, :],
                                      decimal=5)
            assert_array_almost_equal(test_hidden_states[1, 1, :],
                                      test_hidden_states[1, 3, :],
                                      decimal=5)

            # Try again but with different paddings
            # Should get the same result for ['c', 'd']
            big_model = LSTMSequenceEmbedder(token_embeds,
                                             seq_length=8,
                                             hidden_size=7)
            big_model.weights = model.weights  # match weights

            test_embeds_alt, test_hidden_states_alt = big_model.compute(
                [big_model.embeds, big_model.hidden_states.values],
                sequences_alt, token_vocab)
            assert test_embeds_alt.shape == (3, 7)
            assert test_hidden_states_alt.shape == (3, 8, 7)

        assert_array_almost_equal(test_embeds[1, :],
                                  test_embeds_alt[2, :],
                                  decimal=5)
        assert_array_almost_equal(test_hidden_states[1, :2, :],
                                  test_hidden_states_alt[2, :2, :],
                                  decimal=5)
Beispiel #9
0
    def test_lstm(self):
        """Test whether the mask works properly for bidi LSTM embedder."""
        token_vocab = SimpleVocab('a b c d'.split())
        sequences = [
            ['a', 'b', 'c', 'd'],
            ['c', 'd'],
            ['a', 'b', 'c', 'd'],
        ]
        sequences_alt = [
            ['a', 'b', 'c', 'd', 'a', 'b', 'd', 'c'],
            ['b', 'a', 'd'],
            ['c', 'd'],
        ]

        with clean_session():
            token_embeds = tf.constant([
                [1, 2, 0],
                [3, 4, 1],
                [5, 6, 0],
                [7, 8, 1],
            ], dtype=tf.float32)

            model = BidiLSTMSequenceEmbedder(token_embeds, seq_length=4, hidden_size=7)
            test_embeds, test_hidden_states = model.compute(
                    [model.embeds, model.hidden_states.values],
                    sequences, token_vocab)
            assert test_embeds.shape == (3, 14)
            assert test_hidden_states.shape == (3, 4, 14)
            assert_array_almost_equal(test_embeds[1,:7], test_hidden_states[1,1,:7], decimal=5)
            assert_array_almost_equal(test_embeds[1,7:], test_hidden_states[1,0,7:], decimal=5)
            # Padded spaces should have the same forward embeddings
            assert_array_almost_equal(test_hidden_states[1,1,:7], test_hidden_states[1,2,:7], decimal=5)
            assert_array_almost_equal(test_hidden_states[1,1,:7], test_hidden_states[1,3,:7], decimal=5)
            # Padded spaces should have 0 backward embeddings
            assert_array_almost_equal(np.zeros((7,)), test_hidden_states[1,2,7:], decimal=5)
            assert_array_almost_equal(np.zeros((7,)), test_hidden_states[1,3,7:], decimal=5)
            # Other spaces should not have 0 embeddings with very high probability
            assert np.linalg.norm(test_hidden_states[1,0,:7]) > 1e-5
            assert np.linalg.norm(test_hidden_states[1,1,:7]) > 1e-5
            assert np.linalg.norm(test_hidden_states[1,0,7:]) > 1e-5
            assert np.linalg.norm(test_hidden_states[1,1,7:]) > 1e-5

            # Try again but with different paddings
            # Should get the same result for ['c', 'd']
            big_model = BidiLSTMSequenceEmbedder(token_embeds, seq_length=8, hidden_size=7)
            big_model.weights = model.weights  # match weights

            test_embeds_alt, test_hidden_states_alt = big_model.compute(
                    [big_model.embeds, big_model.hidden_states.values],
                    sequences_alt, token_vocab)
            assert test_embeds_alt.shape == (3, 14)
            assert test_hidden_states_alt.shape == (3, 8, 14)

        assert_array_almost_equal(test_embeds[1,:], test_embeds_alt[2,:], decimal=5)
        assert_array_almost_equal(test_hidden_states[1,:2,:],
                test_hidden_states_alt[2,:2,:], decimal=5)
Beispiel #10
0
    def test_no_sequences(self):
        vocab = SimpleVocab('a b c'.split())
        sequences = []

        with clean_session():
            model = FeedSequenceBatch()
            indices = tf.identity(model.values)
            mask = tf.identity(model.mask)
            indices_val, mask_val = model.compute([indices, mask], sequences, vocab)
            assert indices_val.shape == mask_val.shape == (0, 0)
Beispiel #11
0
    def test(self):
        correct = np.array([-2, 2, 21])

        with clean_session():
            array = tf.constant([
                [1, -8, 5, 4, 9],
                [0, 2, 7, 8, 1],
                [2, -8, 6, 4, 9],
            ], dtype=tf.float32)

            mask = tf.constant([
                [1, 1, 1, 0, 0],
                [1, 1, 0, 0, 0],
                [1, 0, 1, 1, 1],
            ], dtype=tf.float32)

            result = reduce_sum(SequenceBatch(array, mask))
            assert_almost_equal(result.eval(), correct, decimal=5)
Beispiel #12
0
def test_broadcast():
    with clean_session():
        values = tf.constant([[
            [1, 2],
            [1, 2],
        ], [
            [1, 2],
            [3, 4],
        ], [
            [5, 6],
            [7, 8],
        ]],
                             dtype=tf.float32)

        mask = tf.constant([
            [1, 0],
            [1, 1],
            [0, 1],
        ], dtype=tf.float32)

        correct = np.array([[
            [1, 1],
            [0, 0],
        ], [
            [1, 1],
            [1, 1],
        ], [
            [0, 0],
            [1, 1],
        ]],
                           dtype=np.float32)

        assert values.get_shape().as_list() == [3, 2, 2]
        assert mask.get_shape().as_list() == [3, 2]

        mask = expand_dims_for_broadcast(mask, values)
        assert mask.get_shape().as_list() == [3, 2, 1]

        mask = broadcast(mask, values)
        assert mask.get_shape().as_list() == [3, 2, 2]

        mask_val = mask.eval()

        assert_array_equal(mask_val, correct)
Beispiel #13
0
    def test_right_align(self, inputs):
        indices = np.array([
            [1, 1, 2, 2, 3],
            [0, 0, 0, 1, 2],
            [0, 0, 0, 0, 2],
            [0, 0, 0, 0, 3],
        ], dtype=np.int32)

        mask = np.array([
            [1, 1, 1, 1, 1],
            [0, 0, 0, 1, 1],
            [0, 0, 0, 0, 1],
            [0, 0, 0, 0, 1],
        ], dtype=np.float32)

        with clean_session():
            model = FeedSequenceBatch(align='right')
            correct = {model.values: indices, model.mask: mask}

            args, kwargs = inputs
            test = model.inputs_to_feed_dict(*args, **kwargs)
            assert_array_collections_equal(correct, test)
Beispiel #14
0
 def test_empty(self):
     with clean_session():
         array = tf.constant(np.empty((0, 10, 20)))
         mask = tf.constant(np.empty((0, 10)))
         bm = reduce_mean(SequenceBatch(array, mask))
         assert bm.eval().shape == (0, 20)
Beispiel #15
0
def clean_test_session():
    with clean_session() as sess:
        yield sess