def test_window_stream(): sentences = [ list(numpy.random.randint(10, size=sentence_length)) for sentence_length in [3, 5, 7] ] stream = DataStream(IterableDataset(sentences)) windows = Window(0, 4, 4, True, stream) for i, (source, target) in enumerate(windows.get_epoch_iterator()): assert source == target assert i == 5 # Total of 6 windows # Make sure that negative indices work windows = Window(-2, 4, 4, False, stream) for i, (source, target) in enumerate(windows.get_epoch_iterator()): assert source[-2:] == target[:2] assert i == 1 # Should get 2 examples # Even for overlapping negative indices should work windows = Window(-2, 4, 4, True, stream) for i, (source, target) in enumerate(windows.get_epoch_iterator()): assert source[:2] == target[-2:] assert i == 1 # Should get 2 examples
def get_stream(batch_size, source_window=4000, target_window=1000, num_examples=5000): from fuel.datasets.youtube_audio import YouTubeAudio data = YouTubeAudio('XqaJ2Ol5cC4') train_stream = data.get_example_stream() train_stream = ForceFloatX(train_stream) window_stream = Window(0, source_window, target_window, overlapping=False, data_stream=train_stream) source_stream = FilterSources(window_stream, sources=('features', )) feats_stream = Mapping(source_stream, mfcc) targets_stream = FilterSources(window_stream, sources=('targets', )) targets_stream = Flatten(targets_stream) stream = Merge((feats_stream, targets_stream), sources=('features', 'targets')) #Add a random Scheme? it_scheme = ConstantScheme(batch_size, num_examples) batched_stream = Batch(stream, it_scheme, strictness=1) return batched_stream