def test_cache(): mnist = MNIST('test') stream = DataStream( mnist, iteration_scheme=SequentialScheme(mnist.num_examples, 11)) cached_stream = CachedDataStream(stream, ConstantScheme(7)) epoch = cached_stream.get_epoch_iterator() # Make sure that cache is filled as expected for (features, targets), cache_size in zip(epoch, [4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 0, 4]): assert len(cached_stream.cache[0]) == cache_size # Make sure that the epoch finishes correctly for features, targets in cached_stream.get_epoch_iterator(): pass assert len(features) == mnist.num_examples % 7 assert not cached_stream.cache[0] # Ensure that the epoch transition is correct cached_stream = CachedDataStream(stream, ConstantScheme(7, times=3)) for _, epoch in zip(range(2), cached_stream.iterate_epochs()): cache_sizes = [4, 8, 1] for i, (features, targets) in enumerate(epoch): assert len(cached_stream.cache[0]) == cache_sizes[i] assert len(features) == 7 assert numpy.all(mnist.features[i * 7:(i + 1) * 7] == features) assert i == 2
def test_cache(): dataset = ContainerDataset(range(100)) stream = DataStream(dataset) batched_stream = BatchDataStream(stream, ConstantScheme(11)) cached_stream = CachedDataStream(batched_stream, ConstantScheme(7)) epoch = cached_stream.get_epoch_iterator() # Make sure that cache is filled as expected for (features,), cache_size in zip(epoch, [4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 0, 4]): assert len(cached_stream.cache[0]) == cache_size # Make sure that the epoch finishes correctly for (features,) in cached_stream.get_epoch_iterator(): pass assert len(features) == 100 % 7 assert not cached_stream.cache[0] # Ensure that the epoch transition is correct cached_stream = CachedDataStream(batched_stream, ConstantScheme(7, times=3)) for _, epoch in zip(range(2), cached_stream.iterate_epochs()): cache_sizes = [4, 8, 1] for i, (features,) in enumerate(epoch): assert len(cached_stream.cache[0]) == cache_sizes[i] assert len(features) == 7 assert numpy.all(range(100)[i * 7:(i + 1) * 7] == features) assert i == 2
class TestDataset(ContainerDataset): sources = ('data',) default_scheme = ConstantScheme(1) def __init__(self): self.data = [[1, 2, 3, 4], [5, 6, 7, 8]] def open(self): epoch_iter = iter(self.data) data_iter = iter(next(epoch_iter)) return (epoch_iter, data_iter) def next_epoch(self, state): try: data_iter = iter(next(state[0])) return (state[0], data_iter) except StopIteration: return self.open() def get_data(self, state, request): data = [] for i in range(request): data.append(next(state[1])) return (data,)
def get_data_stream(iterable): dataset = ContainerDataset({'numbers': iterable}) data_stream = DataStreamMapping(dataset.get_default_stream(), _data_sqrt, add_sources=('roots', )) data_stream = DataStreamMapping(data_stream, _array_tuple) return BatchDataStream(data_stream, ConstantScheme(20))
def get_data_stream(iterable): dataset = ContainerDataset({'numbers': iterable}) data_stream = DataStreamMapping(dataset.get_default_stream(), lambda data: (math.sqrt(data[0]), ), add_sources=('roots', )) data_stream = DataStreamMapping( data_stream, lambda data: tuple( (numpy.asarray(d, dtype=floatX) for d in data))) return BatchDataStream(data_stream, ConstantScheme(20))
def test_batch_data_stream(): stream = ContainerDataset([1, 2, 3, 4, 5]).get_default_stream() batches = list(BatchDataStream(stream, ConstantScheme(2)) .get_epoch_iterator()) expected = [(numpy.array([1, 2]),), (numpy.array([3, 4]),), (numpy.array([5]),)] assert len(batches) == len(expected) for b, e in zip(batches, expected): assert (b[0] == e[0]).all() # Check the `strict` flag def try_strict(): list(BatchDataStream(stream, ConstantScheme(2), strict=True) .get_epoch_iterator()) assert_raises(ValueError, try_strict) stream2 = ContainerDataset([1, 2, 3, 4, 5, 6]).get_default_stream() assert len(list(BatchDataStream(stream2, ConstantScheme(2), strict=True) .get_epoch_iterator())) == 3
def test_padding_data_stream(): # 1-D sequences stream = BatchDataStream( ContainerDataset([[1], [2, 3], [], [4, 5, 6], [7]]) .get_default_stream(), ConstantScheme(2)) mask_stream = PaddingDataStream(stream) assert mask_stream.sources == ("data", "data_mask") it = mask_stream.get_epoch_iterator() data, mask = next(it) assert (data == numpy.array([[1, 0], [2, 3]])).all() assert (mask == numpy.array([[1, 0], [1, 1]])).all() data, mask = next(it) assert (data == numpy.array([[0, 0, 0], [4, 5, 6]])).all() assert (mask == numpy.array([[0, 0, 0], [1, 1, 1]])).all() data, mask = next(it) assert (data == numpy.array([[7]])).all() assert (mask == numpy.array([[1]])).all() # 2D sequences stream2 = BatchDataStream( ContainerDataset([numpy.ones((3, 4)), 2 * numpy.ones((2, 4))]) .get_default_stream(), ConstantScheme(2)) it = PaddingDataStream(stream2).get_epoch_iterator() data, mask = next(it) assert data.shape == (2, 3, 4) assert (data[0, :, :] == 1).all() assert (data[1, :2, :] == 2).all() assert (mask == numpy.array([[1, 1, 1], [1, 1, 0]])).all() # 2 sources stream3 = PaddingDataStream(BatchDataStream( ContainerDataset(dict(features=[[1], [2, 3], []], targets=[[4, 5, 6], [7]])) .get_default_stream(), ConstantScheme(2))) assert len(next(stream3.get_epoch_iterator())) == 4
def main(mode, save_path, num_batches, from_dump): if mode == "train": # Experiment configuration dimension = 100 readout_dimension = len(char2code) # Data processing pipeline data_stream = DataStreamMapping( mapping=lambda data: tuple(array.T for array in data), data_stream=PaddingDataStream( BatchDataStream( iteration_scheme=ConstantScheme(10), data_stream=DataStreamMapping( mapping=reverse_words, add_sources=("targets", ), data_stream=DataStreamFilter( predicate=lambda data: len(data[0]) <= 100, data_stream=OneBillionWord( "training", [99], char2code, level="character", preprocess=str.lower).get_default_stream()))))) # Build the model chars = tensor.lmatrix("features") chars_mask = tensor.matrix("features_mask") targets = tensor.lmatrix("targets") targets_mask = tensor.matrix("targets_mask") encoder = Bidirectional(GatedRecurrent(dim=dimension, activation=Tanh()), weights_init=Orthogonal()) encoder.initialize() fork = Fork([ name for name in encoder.prototype.apply.sequences if name != 'mask' ], weights_init=IsotropicGaussian(0.1), biases_init=Constant(0)) fork.input_dim = dimension fork.fork_dims = {name: dimension for name in fork.fork_names} fork.initialize() lookup = LookupTable(readout_dimension, dimension, weights_init=IsotropicGaussian(0.1)) lookup.initialize() transition = Transition(activation=Tanh(), dim=dimension, attended_dim=2 * dimension, name="transition") attention = SequenceContentAttention( state_names=transition.apply.states, match_dim=dimension, name="attention") readout = LinearReadout(readout_dim=readout_dimension, source_names=["states"], emitter=SoftmaxEmitter(name="emitter"), feedbacker=LookupFeedback( readout_dimension, dimension), name="readout") generator = SequenceGenerator(readout=readout, transition=transition, attention=attention, weights_init=IsotropicGaussian(0.1), biases_init=Constant(0), name="generator") generator.push_initialization_config() transition.weights_init = Orthogonal() generator.initialize() bricks = [encoder, fork, lookup, generator] # Give an idea of what's going on params = Selector(bricks).get_params() logger.info("Parameters:\n" + pprint.pformat([(key, value.get_value().shape) for key, value in params.items()], width=120)) # Build the cost computation graph batch_cost = generator.cost( targets, targets_mask, attended=encoder.apply(**dict_union(fork.apply( lookup.lookup(chars), return_dict=True), mask=chars_mask)), attended_mask=chars_mask).sum() batch_size = named_copy(chars.shape[1], "batch_size") cost = aggregation.mean(batch_cost, batch_size) cost.name = "sequence_log_likelihood" logger.info("Cost graph is built") # Fetch variables useful for debugging max_length = named_copy(chars.shape[0], "max_length") cost_per_character = named_copy( aggregation.mean(batch_cost, batch_size * max_length), "character_log_likelihood") cg = ComputationGraph(cost) energies = unpack(VariableFilter(application=readout.readout, name="output")(cg.variables), singleton=True) min_energy = named_copy(energies.min(), "min_energy") max_energy = named_copy(energies.max(), "max_energy") (activations, ) = VariableFilter( application=generator.transition.apply, name="states")(cg.variables) mean_activation = named_copy(activations.mean(), "mean_activation") # Define the training algorithm. algorithm = GradientDescent(cost=cost, step_rule=CompositeRule([ GradientClipping(10.0), SteepestDescent(0.01) ])) observables = [ cost, min_energy, max_energy, mean_activation, batch_size, max_length, cost_per_character, algorithm.total_step_norm, algorithm.total_gradient_norm ] for name, param in params.items(): observables.append(named_copy(param.norm(2), name + "_norm")) observables.append( named_copy(algorithm.gradients[param].norm(2), name + "_grad_norm")) main_loop = MainLoop( model=bricks, data_stream=data_stream, algorithm=algorithm, extensions=([LoadFromDump(from_dump)] if from_dump else []) + [ Timing(), TrainingDataMonitoring(observables, after_every_batch=True), TrainingDataMonitoring( observables, prefix="average", every_n_batches=10), FinishAfter(after_n_batches=num_batches).add_condition( "after_batch", lambda log: math.isnan( log.current_row.total_gradient_norm)), Plot(os.path.basename(save_path), [["average_" + cost.name], ["average_" + cost_per_character.name]], every_n_batches=10), SerializeMainLoop(save_path, every_n_batches=500, save_separately=["model", "log"]), Printing(every_n_batches=1) ]) main_loop.run() elif mode == "test": with open(save_path, "rb") as source: encoder, fork, lookup, generator = dill.load(source) logger.info("Model is loaded") chars = tensor.lmatrix("features") generated = generator.generate( n_steps=3 * chars.shape[0], batch_size=chars.shape[1], attended=encoder.apply(**dict_union( fork.apply(lookup.lookup(chars), return_dict=True))), attended_mask=tensor.ones(chars.shape)) sample_function = ComputationGraph(generated).get_theano_function() logging.info("Sampling function is compiled") while True: # Python 2-3 compatibility line = input("Enter a sentence\n") batch_size = int(input("Enter a number of samples\n")) encoded_input = [ char2code.get(char, char2code["<UNK>"]) for char in line.lower().strip() ] encoded_input = ([char2code['<S>']] + encoded_input + [char2code['</S>']]) print("Encoder input:", encoded_input) target = reverse_words((encoded_input, ))[0] print("Target: ", target) states, samples, glimpses, weights, costs = sample_function( numpy.repeat(numpy.array(encoded_input)[:, None], batch_size, axis=1)) messages = [] for i in range(samples.shape[1]): sample = list(samples[:, i]) try: true_length = sample.index(char2code['</S>']) + 1 except ValueError: true_length = len(sample) sample = sample[:true_length] cost = costs[:true_length, i].sum() message = "({})".format(cost) message += "".join(code2char[code] for code in sample) if sample == target: message += " CORRECT!" messages.append((cost, message)) messages.sort(key=lambda tuple_: -tuple_[0]) for _, message in messages: print(message)
def try_strict(): list(BatchDataStream(stream, ConstantScheme(2), strict=True) .get_epoch_iterator())
def main(mode, save_path, steps, num_batches): num_states = MarkovChainDataset.num_states if mode == "train": # Experiment configuration rng = numpy.random.RandomState(1) batch_size = 50 seq_len = 100 dim = 10 feedback_dim = 8 # Build the bricks and initialize them transition = GatedRecurrent(name="transition", activation=Tanh(), dim=dim) generator = SequenceGenerator(LinearReadout( readout_dim=num_states, source_names=["states"], emitter=SoftmaxEmitter(name="emitter"), feedbacker=LookupFeedback(num_states, feedback_dim, name='feedback'), name="readout"), transition, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), name="generator") generator.push_initialization_config() transition.weights_init = Orthogonal() generator.initialize() # Give an idea of what's going on. logger.info("Parameters:\n" + pprint.pformat( [(key, value.get_value().shape) for key, value in Selector(generator).get_params().items()], width=120)) logger.info("Markov chain entropy: {}".format( MarkovChainDataset.entropy)) logger.info("Expected min error: {}".format( -MarkovChainDataset.entropy * seq_len)) # Build the cost computation graph. x = tensor.lmatrix('data') cost = aggregation.mean(generator.cost(x[:, :]).sum(), x.shape[1]) cost.name = "sequence_log_likelihood" algorithm = GradientDescent( cost=cost, params=list(Selector(generator).get_params().values()), step_rule=Scale(0.001)) main_loop = MainLoop(algorithm=algorithm, data_stream=DataStream( MarkovChainDataset(rng, seq_len), iteration_scheme=ConstantScheme(batch_size)), model=Model(cost), extensions=[ FinishAfter(after_n_batches=num_batches), TrainingDataMonitoring( [cost], prefix="this_step", after_every_batch=True), TrainingDataMonitoring([cost], prefix="average", every_n_batches=100), SerializeMainLoop(save_path, every_n_batches=500), Printing(every_n_batches=100) ]) main_loop.run() elif mode == "sample": main_loop = cPickle.load(open(save_path, "rb")) generator = main_loop.model sample = ComputationGraph( generator.generate(n_steps=steps, batch_size=1, iterate=True)).get_theano_function() states, outputs, costs = [data[:, 0] for data in sample()] numpy.set_printoptions(precision=3, suppress=True) print("Generation cost:\n{}".format(costs.sum())) freqs = numpy.bincount(outputs).astype(floatX) freqs /= freqs.sum() print("Frequencies:\n {} vs {}".format(freqs, MarkovChainDataset.equilibrium)) trans_freqs = numpy.zeros((num_states, num_states), dtype=floatX) for a, b in zip(outputs, outputs[1:]): trans_freqs[a, b] += 1 trans_freqs /= trans_freqs.sum(axis=1)[:, None] print("Transition frequencies:\n{}\nvs\n{}".format( trans_freqs, MarkovChainDataset.trans_prob)) else: assert False
def try_strict(strictness): return list( BatchDataStream(stream, ConstantScheme(2), strictness=strictness) .get_epoch_iterator())