def test_merge(): english = IterableDataset(['Hello world!']) french = IterableDataset(['Bonjour le monde!']) streams = (english.get_example_stream(), french.get_example_stream()) merged_stream = Merge(streams, ('english', 'french')) assert merged_stream.sources == ('english', 'french') assert (next(merged_stream.get_epoch_iterator()) == ('Hello world!', 'Bonjour le monde!'))
def test_merge(): english = IterableDataset(['Hello world!']) french = IterableDataset(['Bonjour le monde!']) streams = (english.get_example_stream(), french.get_example_stream()) merged_stream = Merge(streams, ('english', 'french')) assert merged_stream.sources == ('english', 'french') assert (next(merged_stream.get_epoch_iterator()) == ('Hello world!', 'Bonjour le monde!'))
def test_training_data_monitoring_updates_algorithm(): features = [ numpy.array(f, dtype=theano.config.floatX) for f in [[1, 2], [3, 5], [5, 8]] ] targets = numpy.array([f.sum() for f in features]) dataset = IterableDataset(dict(features=features, targets=targets)) x = tensor.vector('features') y = tensor.scalar('targets') m = x.mean().copy(name='features_mean') t = y.sum().copy(name='targets_sum') main_loop = MainLoop( model=None, data_stream=dataset.get_example_stream(), algorithm=UpdatesAlgorithm(), extensions=[ TrainingDataMonitoring([m, t], prefix="train1", after_batch=True) ], ) main_loop.extensions[0].main_loop = main_loop assert len(main_loop.algorithm.updates) == 0 main_loop.extensions[0].do('before_training') assert len(main_loop.algorithm.updates) > 0
def setup_mainloop(extension): """Set up a simple main loop for progress bar tests. Create a MainLoop, register the given extension, supply it with a DataStream and a minimal model/cost to optimize. """ # Since progressbar2 3.6.0, the `maxval` kwarg has been replaced by # `max_value`, which has a default value of 100. If we're still using # `maxval` by accident, this test should fail complaining that # the progress bar has received a value out of range. features = [numpy.array(f, dtype=theano.config.floatX) for f in [[1, 2]] * 101] dataset = IterableDataset(dict(features=features)) W = shared_floatx([0, 0], name='W') x = tensor.vector('features') cost = tensor.sum((x-W)**2) cost.name = "cost" algorithm = GradientDescent(cost=cost, parameters=[W], step_rule=Scale(1e-3)) main_loop = MainLoop( model=None, data_stream=dataset.get_example_stream(), algorithm=algorithm, extensions=[ FinishAfter(after_n_epochs=1), extension]) return main_loop
def setup_mainloop(extension): """Set up a simple main loop for progress bar tests. Create a MainLoop, register the given extension, supply it with a DataStream and a minimal model/cost to optimize. """ features = [numpy.array(f, dtype=theano.config.floatX) for f in [[1, 2], [3, 4], [5, 6]]] dataset = IterableDataset(dict(features=features)) W = shared_floatx([0, 0], name='W') x = tensor.vector('features') cost = tensor.sum((x-W)**2) cost.name = "cost" algorithm = GradientDescent(cost=cost, parameters=[W], step_rule=Scale(1e-3)) main_loop = MainLoop( model=None, data_stream=dataset.get_example_stream(), algorithm=algorithm, extensions=[ FinishAfter(after_n_epochs=1), extension]) return main_loop
def test_shared_variable_modifier_two_params(): weights = numpy.array([-1, 1], dtype=theano.config.floatX) features = [numpy.array(f, dtype=theano.config.floatX) for f in [[1, 2], [3, 4], [5, 6]]] targets = [(weights * f).sum() for f in features] n_batches = 3 dataset = IterableDataset(dict(features=features, targets=targets)) x = tensor.vector('features') y = tensor.scalar('targets') W = shared_floatx([0, 0], name='W') cost = ((x * W).sum() - y) ** 2 cost.name = 'cost' step_rule = Scale(0.001) sgd = GradientDescent(cost=cost, params=[W], step_rule=step_rule) modifier = SharedVariableModifier( step_rule.learning_rate, lambda _, val: numpy.cast[theano.config.floatX](val * 0.2)) main_loop = MainLoop( model=None, data_stream=dataset.get_example_stream(), algorithm=sgd, extensions=[FinishAfter(after_n_epochs=1), modifier]) main_loop.run() new_value = step_rule.learning_rate.get_value() assert_allclose(new_value, 0.001 * 0.2 ** n_batches, atol=1e-5)
def get_data_stream(iterable): dataset = IterableDataset({'numbers': iterable}) data_stream = Mapping(dataset.get_example_stream(), _data_sqrt, add_sources=('roots', )) data_stream = Mapping(data_stream, _array_tuple) return Batch(data_stream, ConstantScheme(20))
def setup_mainloop(extension): """Set up a simple main loop for progress bar tests. Create a MainLoop, register the given extension, supply it with a DataStream and a minimal model/cost to optimize. """ features = [ numpy.array(f, dtype=theano.config.floatX) for f in [[1, 2], [3, 4], [5, 6]] ] dataset = IterableDataset(dict(features=features)) W = shared_floatx([0, 0], name='W') x = tensor.vector('features') cost = tensor.sum((x - W)**2) cost.name = "cost" algorithm = GradientDescent(cost=cost, params=[W], step_rule=Scale(1e-3)) main_loop = MainLoop(model=None, data_stream=dataset.get_example_stream(), algorithm=algorithm, extensions=[FinishAfter(after_n_epochs=1), extension]) return main_loop
def test_shared_variable_modifier(): weights = numpy.array([-1, 1], dtype=theano.config.floatX) features = [numpy.array(f, dtype=theano.config.floatX) for f in [[1, 2], [3, 4], [5, 6]]] targets = [(weights * f).sum() for f in features] n_batches = 3 dataset = IterableDataset(dict(features=features, targets=targets)) x = tensor.vector("features") y = tensor.scalar("targets") W = shared_floatx([0, 0], name="W") cost = ((x * W).sum() - y) ** 2 cost.name = "cost" step_rule = Scale(0.001) sgd = GradientDescent(cost=cost, parameters=[W], step_rule=step_rule) main_loop = MainLoop( model=None, data_stream=dataset.get_example_stream(), algorithm=sgd, extensions=[ FinishAfter(after_n_epochs=1), SharedVariableModifier(step_rule.learning_rate, lambda n: numpy.cast[theano.config.floatX](10.0 / n)), ], ) main_loop.run() assert_allclose(step_rule.learning_rate.get_value(), numpy.cast[theano.config.floatX](10.0 / n_batches))
def test_shared_variable_modifier_two_parameters(): weights = numpy.array([-1, 1], dtype=theano.config.floatX) features = [numpy.array(f, dtype=theano.config.floatX) for f in [[1, 2], [3, 4], [5, 6]]] targets = [(weights * f).sum() for f in features] n_batches = 3 dataset = IterableDataset(dict(features=features, targets=targets)) x = tensor.vector('features') y = tensor.scalar('targets') W = shared_floatx([0, 0], name='W') cost = ((x * W).sum() - y) ** 2 cost.name = 'cost' step_rule = Scale(0.001) sgd = GradientDescent(cost=cost, parameters=[W], step_rule=step_rule) modifier = SharedVariableModifier( step_rule.learning_rate, lambda _, val: numpy.cast[theano.config.floatX](val * 0.2)) main_loop = MainLoop( model=None, data_stream=dataset.get_example_stream(), algorithm=sgd, extensions=[FinishAfter(after_n_epochs=1), modifier]) main_loop.run() new_value = step_rule.learning_rate.get_value() assert_allclose(new_value, 0.001 * 0.2 ** n_batches, atol=1e-5)
def test_training_data_monitoring(): weights = numpy.array([-1, 1], dtype=theano.config.floatX) features = [ numpy.array(f, dtype=theano.config.floatX) for f in [[1, 2], [3, 4], [5, 6]] ] targets = [(weights * f).sum() for f in features] n_batches = 3 dataset = IterableDataset(dict(features=features, targets=targets)) x = tensor.vector('features') y = tensor.scalar('targets') W = shared_floatx([0, 0], name='W') V = shared_floatx(7, name='V') W_sum = named_copy(W.sum(), 'W_sum') cost = ((x * W).sum() - y)**2 cost.name = 'cost' class TrueCostExtension(TrainingExtension): def before_batch(self, data): self.main_loop.log.current_row['true_cost'] = (( (W.get_value() * data["features"]).sum() - data["targets"])**2) main_loop = MainLoop(model=None, data_stream=dataset.get_example_stream(), algorithm=GradientDescent(cost=cost, parameters=[W], step_rule=Scale(0.001)), extensions=[ FinishAfter(after_n_epochs=1), TrainingDataMonitoring([W_sum, cost, V], prefix="train1", after_batch=True), TrainingDataMonitoring( [aggregation.mean(W_sum), cost], prefix="train2", after_epoch=True), TrueCostExtension() ]) main_loop.run() # Check monitoring of a shared varible assert_allclose(main_loop.log.current_row['train1_V'], 7.0) for i in range(n_batches): # The ground truth is written to the log before the batch is # processed, where as the extension writes after the batch is # processed. This is why the iteration numbers differs here. assert_allclose(main_loop.log[i]['true_cost'], main_loop.log[i + 1]['train1_cost']) assert_allclose( main_loop.log[n_batches]['train2_cost'], sum([main_loop.log[i]['true_cost'] for i in range(n_batches)]) / n_batches) assert_allclose( main_loop.log[n_batches]['train2_W_sum'], sum([ main_loop.log[i]['train1_W_sum'] for i in range(1, n_batches + 1) ]) / n_batches)
def get_data_stream(iterable): """Returns a 'fuel.Batch' datastream of [x~input~numbers, y~targets~roots], with each iteration returning a batch of 20 training examples """ numbers = numpy.asarray(iterable, dtype=floatX) dataset = IterableDataset( {'numbers': numbers, 'roots': numpy.sqrt(numbers)}) return Batch(dataset.get_example_stream(), ConstantScheme(20))
def get_data_stream(iterable): """Returns a 'fuel.Batch' datastream of [x~input~numbers, y~targets~roots], with each iteration returning a batch of 20 training examples """ dataset = IterableDataset({"numbers": iterable}) data_stream = Mapping(dataset.get_example_stream(), _data_sqrt, add_sources=("roots",)) data_stream = Mapping(data_stream, _array_tuple) return Batch(data_stream, ConstantScheme(20))
def get_data_stream(iterable): """Returns a 'fuel.Batch' datastream of [x~input~numbers, y~targets~roots], with each iteration returning a batch of 20 training examples """ dataset = IterableDataset({'numbers': iterable}) data_stream = Mapping(dataset.get_example_stream(), _data_sqrt, add_sources=('roots', )) data_stream = Mapping(data_stream, _array_tuple) return Batch(data_stream, ConstantScheme(20))
def get_data_stream(iterable): """Returns a 'fuel.Batch' datastream of [x~input~numbers, y~targets~roots], with each iteration returning a batch of 20 training examples """ numbers = numpy.asarray(iterable, dtype=floatX) dataset = IterableDataset({ 'numbers': numbers, 'roots': numpy.sqrt(numbers) }) return Batch(dataset.get_example_stream(), ConstantScheme(20))
def test_training_data_monitoring(): weights = numpy.array([-1, 1], dtype=theano.config.floatX) features = [numpy.array(f, dtype=theano.config.floatX) for f in [[1, 2], [3, 4], [5, 6]]] targets = [(weights * f).sum() for f in features] n_batches = 3 dataset = IterableDataset(dict(features=features, targets=targets)) x = tensor.vector('features') y = tensor.scalar('targets') W = shared_floatx([0, 0], name='W') V = shared_floatx(7, name='V') W_sum = named_copy(W.sum(), 'W_sum') cost = ((x * W).sum() - y) ** 2 cost.name = 'cost' class TrueCostExtension(TrainingExtension): def before_batch(self, data): self.main_loop.log.current_row['true_cost'] = ( ((W.get_value() * data["features"]).sum() - data["targets"]) ** 2) main_loop = MainLoop( model=None, data_stream=dataset.get_example_stream(), algorithm=GradientDescent(cost=cost, params=[W], step_rule=Scale(0.001)), extensions=[ FinishAfter(after_n_epochs=1), TrainingDataMonitoring([W_sum, cost, V], prefix="train1", after_batch=True), TrainingDataMonitoring([aggregation.mean(W_sum), cost], prefix="train2", after_epoch=True), TrueCostExtension()]) main_loop.run() # Check monitoring of a shared varible assert_allclose(main_loop.log.current_row['train1_V'], 7.0) for i in range(n_batches): # The ground truth is written to the log before the batch is # processed, where as the extension writes after the batch is # processed. This is why the iteration numbers differs here. assert_allclose(main_loop.log[i]['true_cost'], main_loop.log[i + 1]['train1_cost']) assert_allclose( main_loop.log[n_batches]['train2_cost'], sum([main_loop.log[i]['true_cost'] for i in range(n_batches)]) / n_batches) assert_allclose( main_loop.log[n_batches]['train2_W_sum'], sum([main_loop.log[i]['train1_W_sum'] for i in range(1, n_batches + 1)]) / n_batches)
def test_training_data_monitoring_updates_algorithm(): features = [numpy.array(f, dtype=theano.config.floatX) for f in [[1, 2], [3, 5], [5, 8]]] targets = numpy.array([f.sum() for f in features]) dataset = IterableDataset(dict(features=features, targets=targets)) x = tensor.vector('features') y = tensor.scalar('targets') m = x.mean().copy(name='features_mean') t = y.sum().copy(name='targets_sum') main_loop = MainLoop( model=None, data_stream=dataset.get_example_stream(), algorithm=UpdatesAlgorithm(), extensions=[TrainingDataMonitoring([m, t], prefix="train1", after_batch=True)], ) main_loop.extensions[0].main_loop = main_loop assert len(main_loop.algorithm.updates) == 0 main_loop.extensions[0].do('before_training') assert len(main_loop.algorithm.updates) > 0
def get_data_stream(iterable): dataset = IterableDataset({'numbers': iterable}) data_stream = Mapping(dataset.get_example_stream(), _data_sqrt, add_sources=('roots',)) data_stream = Mapping(data_stream, _array_tuple) return Batch(data_stream, ConstantScheme(20))