def ctc_data(): """ Returns a provider that can be used with `ctc_model()`. """ number_of_samples = 11 vocab_size = 4 # Same as above output_timesteps = 10 maximum_transcription_length = 4 # Must be <= output_timesteps return BatchProvider( sources={ 'TEST_input': VanillaSource( numpy.random.uniform(low=-1, high=1, size=(number_of_samples, output_timesteps, 2))), 'TEST_transcription': VanillaSource( numpy.random.random_integers(0, vocab_size - 1, size=(number_of_samples, maximum_transcription_length ))), 'TEST_input_length': VanillaSource( numpy.ones(shape=(number_of_samples, 1)) * output_timesteps), 'TEST_transcription_length': VanillaSource( numpy.random.random_integers(1, maximum_transcription_length, size=(number_of_samples, 1))) })
def test_double(self): """ Test a random permutation with exactly two sources. """ data = [numpy.array([1, 2, 3, 4, 5]), numpy.array([6, 7, 8, 9, 0])] sources = [VanillaSource(data[0]), VanillaSource(data[1])] stack = StackSource(*sources) total = sum(len(x) for x in data) assert len(stack) == total assert stack.shape() == () indices = numpy.random.permutation(total) preshuffled = [numpy.copy(x) for x in data] stack.shuffle(indices) sub = [i for i in indices if i < len(data[0])] for i, x in enumerate(data[0]): assert preshuffled[0][sub[i]] == x sub = [i % len(data[0]) for i in indices if i >= len(data[0])] for i, x in enumerate(data[1]): assert preshuffled[1][sub[i]] == x cur = 0 for batch in stack: for actual in batch: i = indices[cur] expected = preshuffled[i // 5][i % 5] assert actual == expected cur += 1 assert cur == total
def simple_data(): """ Returns a small provider that can be used to train the `simple_model()`. """ return BatchProvider( sources={ 'TEST_input': VanillaSource(numpy.random.uniform(size=(100, 10))), 'TEST_output': VanillaSource(numpy.random.uniform(size=(100, 1))) })
def embedding_data(): """ Returns a small provider that can be used to train the `embedding_model()`. """ return BatchProvider( sources={ 'TEST_input': VanillaSource(numpy.random.random_integers(0, 99, size=(5, 10))), 'TEST_output': VanillaSource(numpy.random.uniform(size=(5, 3))) })
def uber_data(): """ In the land of Mordor, where the shadows lie. Data for the uber model. """ return BatchProvider( sources={ 'TEST_input': VanillaSource( numpy.random.uniform(low=-1, high=1, size=(2, 32, 32))), 'TEST_output': VanillaSource(numpy.random.uniform(low=-1, high=1, size=(2, 140))) })
def test_single(self): """ Test a random permutation with exactly one source. """ data = numpy.array([1, 2, 3, 4, 5]) source = VanillaSource(data) stack = StackSource(source) assert len(stack) == len(data) assert stack.shape() == () indices = numpy.random.permutation(len(data)) preshuffled = numpy.copy(data) stack.shuffle(indices) for i, x in enumerate(data): assert preshuffled[indices[i]] == x cur = 0 for batch in stack: for actual in batch: expected = data[cur] assert actual == expected cur += 1 assert cur == len(data)
def test_single_fixed(self): """ Test a known permutation with exactly one source. """ data = numpy.array([1, 2, 3, 4, 5]) indices = [4, 2, 1, 3, 0] expected = numpy.array([5, 3, 2, 4, 1]) source = VanillaSource(data) stack = StackSource(source) stack.shuffle(indices) result = numpy.array([actual for batch in stack for actual in batch]) assert numpy.allclose(result, expected)
def test_triple_fixed(self): """ Test a known permutation with exactly three sources. """ data = [ numpy.arange(10, 16), numpy.arange(16, 20), numpy.arange(20, 23) ] indices = [12, 3, 5, 6, 11, 0, 10, 2, 1, 4, 9, 7, 8] expected = numpy.array( [22, 13, 15, 16, 21, 10, 20, 12, 11, 14, 19, 17, 18]) sources = [VanillaSource(x) for x in data] stack = StackSource(*sources) stack.shuffle(indices) result = numpy.array([actual for batch in stack for actual in batch]) assert numpy.allclose(result, expected)
def source(request, num_entries, chunk_size): """ Creates the data Source to test. """ num_stacks = request.param if num_stacks == 0: result = VanillaSource(numpy.random.permutation(num_entries)+10) else: result = StackSource(*[ VanillaSource( numpy.random.permutation(num_entries) + 100*i ) for i in range(num_stacks) ]) if chunk_size is not None: result.set_chunk_size(chunk_size) return result