Beispiel #1
0
def ctc_data():
    """ Returns a provider that can be used with `ctc_model()`.
	"""
    number_of_samples = 11
    vocab_size = 4  # Same as above
    output_timesteps = 10
    maximum_transcription_length = 4  # Must be <= output_timesteps
    return BatchProvider(
        sources={
            'TEST_input':
            VanillaSource(
                numpy.random.uniform(low=-1,
                                     high=1,
                                     size=(number_of_samples, output_timesteps,
                                           2))),
            'TEST_transcription':
            VanillaSource(
                numpy.random.random_integers(0,
                                             vocab_size - 1,
                                             size=(number_of_samples,
                                                   maximum_transcription_length
                                                   ))),
            'TEST_input_length':
            VanillaSource(
                numpy.ones(shape=(number_of_samples, 1)) * output_timesteps),
            'TEST_transcription_length':
            VanillaSource(
                numpy.random.random_integers(1,
                                             maximum_transcription_length,
                                             size=(number_of_samples, 1)))
        })
Beispiel #2
0
    def test_double(self):
        """ Test a random permutation with exactly two sources.
		"""
        data = [numpy.array([1, 2, 3, 4, 5]), numpy.array([6, 7, 8, 9, 0])]
        sources = [VanillaSource(data[0]), VanillaSource(data[1])]
        stack = StackSource(*sources)

        total = sum(len(x) for x in data)
        assert len(stack) == total
        assert stack.shape() == ()

        indices = numpy.random.permutation(total)

        preshuffled = [numpy.copy(x) for x in data]
        stack.shuffle(indices)

        sub = [i for i in indices if i < len(data[0])]
        for i, x in enumerate(data[0]):
            assert preshuffled[0][sub[i]] == x

        sub = [i % len(data[0]) for i in indices if i >= len(data[0])]
        for i, x in enumerate(data[1]):
            assert preshuffled[1][sub[i]] == x

        cur = 0
        for batch in stack:
            for actual in batch:

                i = indices[cur]
                expected = preshuffled[i // 5][i % 5]
                assert actual == expected
                cur += 1

        assert cur == total
Beispiel #3
0
def simple_data():
    """ Returns a small provider that can be used to train the `simple_model()`.
	"""
    return BatchProvider(
        sources={
            'TEST_input': VanillaSource(numpy.random.uniform(size=(100, 10))),
            'TEST_output': VanillaSource(numpy.random.uniform(size=(100, 1)))
        })
Beispiel #4
0
def embedding_data():
    """ Returns a small provider that can be used to train the
		`embedding_model()`.
	"""
    return BatchProvider(
        sources={
            'TEST_input':
            VanillaSource(numpy.random.random_integers(0, 99, size=(5, 10))),
            'TEST_output':
            VanillaSource(numpy.random.uniform(size=(5, 3)))
        })
Beispiel #5
0
def uber_data():
    """ In the land of Mordor, where the shadows lie.
		Data for the uber model.
	"""
    return BatchProvider(
        sources={
            'TEST_input':
            VanillaSource(
                numpy.random.uniform(low=-1, high=1, size=(2, 32, 32))),
            'TEST_output':
            VanillaSource(numpy.random.uniform(low=-1, high=1, size=(2, 140)))
        })
Beispiel #6
0
    def test_single(self):
        """ Test a random permutation with exactly one source.
		"""
        data = numpy.array([1, 2, 3, 4, 5])
        source = VanillaSource(data)
        stack = StackSource(source)

        assert len(stack) == len(data)
        assert stack.shape() == ()

        indices = numpy.random.permutation(len(data))

        preshuffled = numpy.copy(data)
        stack.shuffle(indices)

        for i, x in enumerate(data):
            assert preshuffled[indices[i]] == x

        cur = 0
        for batch in stack:
            for actual in batch:
                expected = data[cur]
                assert actual == expected
                cur += 1

        assert cur == len(data)
Beispiel #7
0
    def test_single_fixed(self):
        """ Test a known permutation with exactly one source.
		"""
        data = numpy.array([1, 2, 3, 4, 5])
        indices = [4, 2, 1, 3, 0]
        expected = numpy.array([5, 3, 2, 4, 1])

        source = VanillaSource(data)
        stack = StackSource(source)
        stack.shuffle(indices)
        result = numpy.array([actual for batch in stack for actual in batch])

        assert numpy.allclose(result, expected)
Beispiel #8
0
    def test_triple_fixed(self):
        """ Test a known permutation with exactly three sources.
		"""
        data = [
            numpy.arange(10, 16),
            numpy.arange(16, 20),
            numpy.arange(20, 23)
        ]
        indices = [12, 3, 5, 6, 11, 0, 10, 2, 1, 4, 9, 7, 8]
        expected = numpy.array(
            [22, 13, 15, 16, 21, 10, 20, 12, 11, 14, 19, 17, 18])

        sources = [VanillaSource(x) for x in data]
        stack = StackSource(*sources)
        stack.shuffle(indices)
        result = numpy.array([actual for batch in stack for actual in batch])

        assert numpy.allclose(result, expected)
Beispiel #9
0
def source(request, num_entries, chunk_size):
	""" Creates the data Source to test.
	"""
	num_stacks = request.param
	if num_stacks == 0:
		result = VanillaSource(numpy.random.permutation(num_entries)+10)
	else:
		result = StackSource(*[
			VanillaSource(
				numpy.random.permutation(num_entries) + 100*i
			) for i in range(num_stacks)
		])
	if chunk_size is not None:
		result.set_chunk_size(chunk_size)
	return result