def setUp(self): self.gated = GatedRecurrent( dim=3, weights_init=Constant(2), activation=Tanh(), gate_activation=Tanh()) self.gated.initialize() self.reset_only = GatedRecurrent( dim=3, weights_init=IsotropicGaussian(), activation=Tanh(), gate_activation=Tanh(), use_update_gate=False, rng=numpy.random.RandomState(1)) self.reset_only.initialize()
def setUp(self): self.gated = GatedRecurrent(dim=3, weights_init=Constant(2), activation=Tanh(), gate_activation=Tanh()) self.gated.initialize() self.reset_only = GatedRecurrent(dim=3, weights_init=IsotropicGaussian(), activation=Tanh(), gate_activation=Tanh(), use_update_gate=False, rng=numpy.random.RandomState(1)) self.reset_only.initialize()
def test_integer_sequence_generator(): # Disclaimer: here we only check shapes, not values. readout_dim = 5 feedback_dim = 3 dim = 20 batch_size = 30 n_steps = 10 transition = GatedRecurrent(name="transition", activation=Tanh(), dim=dim, weights_init=Orthogonal()) generator = SequenceGenerator(LinearReadout( readout_dim=readout_dim, source_names=["states"], emitter=SoftmaxEmitter(name="emitter"), feedbacker=LookupFeedback(readout_dim, feedback_dim), name="readout"), transition, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), name="generator") generator.initialize() y = tensor.lmatrix('y') mask = tensor.matrix('mask') costs = generator.cost(y, mask) assert costs.ndim == 2 costs_val = theano.function([y, mask], [costs])(numpy.zeros((n_steps, batch_size), dtype='int64'), numpy.ones((n_steps, batch_size), dtype=floatX))[0] assert costs_val.shape == (n_steps, batch_size) states, outputs, costs = generator.generate(iterate=True, batch_size=batch_size, n_steps=n_steps) states_val, outputs_val, costs_val = theano.function( [], [states, outputs, costs], updates=costs.owner.inputs[0].owner.tag.updates)() assert states_val.shape == (n_steps, batch_size, dim) assert outputs_val.shape == (n_steps, batch_size) assert outputs_val.dtype == 'int64' assert costs_val.shape == (n_steps, batch_size)
def test_sequence_generator(): # Disclaimer: here we only check shapes, not values. output_dim = 1 dim = 20 batch_size = 30 n_steps = 10 transition = GatedRecurrent(name="transition", activation=Tanh(), dim=dim, weights_init=Orthogonal()) generator = SequenceGenerator(LinearReadout( readout_dim=output_dim, source_names=["states"], emitter=TestEmitter(name="emitter"), name="readout"), transition, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), name="generator") generator.initialize() y = tensor.tensor3('y') mask = tensor.matrix('mask') costs = generator.cost(y, mask) assert costs.ndim == 2 costs_val = theano.function([y, mask], [costs])(numpy.zeros( (n_steps, batch_size, output_dim), dtype=floatX), numpy.ones( (n_steps, batch_size), dtype=floatX))[0] assert costs_val.shape == (n_steps, batch_size) states, outputs, costs = [ variable.eval() for variable in generator.generate( iterate=True, batch_size=batch_size, n_steps=n_steps) ] assert states.shape == (n_steps, batch_size, dim) assert outputs.shape == (n_steps, batch_size, output_dim) assert costs.shape == (n_steps, batch_size)
class TestGatedRecurrent(unittest.TestCase): def setUp(self): self.gated = GatedRecurrent(dim=3, weights_init=Constant(2), activation=Tanh(), gate_activation=Tanh()) self.gated.initialize() self.reset_only = GatedRecurrent(dim=3, weights_init=IsotropicGaussian(), activation=Tanh(), gate_activation=Tanh(), use_update_gate=False, rng=numpy.random.RandomState(1)) self.reset_only.initialize() def test_one_step(self): h0 = tensor.matrix('h0') x = tensor.matrix('x') z = tensor.matrix('z') r = tensor.matrix('r') h1 = self.gated.apply(x, z, r, h0, iterate=False) next_h = theano.function(inputs=[h0, x, z, r], outputs=[h1]) h0_val = 0.1 * numpy.array([[1, 1, 0], [0, 1, 1]], dtype=floatX) x_val = 0.1 * numpy.array([[1, 2, 3], [4, 5, 6]], dtype=floatX) zi_val = (h0_val + x_val) / 2 ri_val = -x_val W_val = 2 * numpy.ones((3, 3), dtype=floatX) z_val = numpy.tanh(h0_val.dot(W_val) + zi_val) r_val = numpy.tanh(h0_val.dot(W_val) + ri_val) h1_val = (z_val * numpy.tanh((r_val * h0_val).dot(W_val) + x_val) + (1 - z_val) * h0_val) assert_allclose(h1_val, next_h(h0_val, x_val, zi_val, ri_val)[0], rtol=1e-6) def test_reset_only_many_steps(self): x = tensor.tensor3('x') ri = tensor.tensor3('ri') mask = tensor.matrix('mask') h = self.reset_only.apply(x, reset_inps=ri, mask=mask) calc_h = theano.function(inputs=[x, ri, mask], outputs=[h]) x_val = 0.1 * numpy.asarray(list(itertools.permutations(range(4))), dtype=floatX) x_val = numpy.ones((24, 4, 3), dtype=floatX) * x_val[..., None] ri_val = 0.3 - x_val mask_val = numpy.ones((24, 4), dtype=floatX) mask_val[12:24, 3] = 0 h_val = numpy.zeros((25, 4, 3), dtype=floatX) W = self.reset_only.state_to_state.get_value() U = self.reset_only.state_to_reset.get_value() for i in range(1, 25): r_val = numpy.tanh(h_val[i - 1].dot(U) + ri_val[i - 1]) h_val[i] = numpy.tanh((r_val * h_val[i - 1]).dot(W) + x_val[i - 1]) h_val[i] = (mask_val[i - 1, :, None] * h_val[i] + (1 - mask_val[i - 1, :, None]) * h_val[i - 1]) h_val = h_val[1:] # TODO Figure out why this tolerance needs to be so big assert_allclose(h_val, calc_h(x_val, ri_val, mask_val)[0], 1e-03)
def main(): logging.basicConfig( level=logging.DEBUG, format="%(asctime)s: %(name)s: %(levelname)s: %(message)s") parser = argparse.ArgumentParser( "Case study of generating a Markov chain with RNN.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "mode", choices=["train", "sample"], help="The mode to run. Use `train` to train a new model" " and `sample` to sample a sequence generated by an" " existing one.") parser.add_argument("prefix", default="sine", help="The prefix for model, timing and state files") parser.add_argument("--steps", type=int, default=100, help="Number of steps to plot") args = parser.parse_args() dim = 10 num_states = ChainIterator.num_states feedback_dim = 8 transition = GatedRecurrent(name="transition", activation=Tanh(), dim=dim) generator = SequenceGenerator(LinearReadout( readout_dim=num_states, source_names=["states"], emitter=SoftmaxEmitter(name="emitter"), feedbacker=LookupFeedback(num_states, feedback_dim, name='feedback'), name="readout"), transition, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), name="generator") generator.allocate() logger.debug("Parameters:\n" + pprint.pformat( [(key, value.get_value().shape) for key, value in Selector(generator).get_params().items()], width=120)) if args.mode == "train": rng = numpy.random.RandomState(1) batch_size = 50 generator.push_initialization_config() transition.weights_init = Orthogonal() generator.initialize() logger.debug("transition.weights_init={}".format( transition.weights_init)) cost = Cost(generator.cost(tensor.lmatrix('x')).sum()) gh_model = GroundhogModel(generator, cost) state = GroundhogState(args.prefix, batch_size, learning_rate=0.0001).as_dict() data = ChainIterator(rng, 100, batch_size) trainer = SGD(gh_model, state, data) main_loop = MainLoop(data, None, None, gh_model, trainer, state, None) main_loop.main() elif args.mode == "sample": load_params(generator, args.prefix + "model.npz") sample = ComputationGraph( generator.generate(n_steps=args.steps, batch_size=1, iterate=True)).function() states, outputs, costs = [data[:, 0] for data in sample()] numpy.set_printoptions(precision=3, suppress=True) print("Generation cost:\n{}".format(costs.sum())) freqs = numpy.bincount(outputs).astype(floatX) freqs /= freqs.sum() print("Frequencies:\n {} vs {}".format(freqs, ChainIterator.equilibrium)) trans_freqs = numpy.zeros((num_states, num_states), dtype=floatX) for a, b in zip(outputs, outputs[1:]): trans_freqs[a, b] += 1 trans_freqs /= trans_freqs.sum(axis=1)[:, None] print("Transition frequencies:\n{}\nvs\n{}".format( trans_freqs, ChainIterator.trans_prob)) else: assert False
def main(): logging.basicConfig( level=logging.DEBUG, format="%(asctime)s: %(name)s: %(levelname)s: %(message)s") parser = argparse.ArgumentParser( "Case study of generating simple 1d sequences with RNN.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "mode", choices=["train", "plot"], help="The mode to run. Use `train` to train a new model" " and `plot` to plot a sequence generated by an" " existing one.") parser.add_argument("prefix", default="sine", help="The prefix for model, timing and state files") parser.add_argument("--input-noise", type=float, default=0.0, help="Adds Gaussian noise of given intensity to the " " training sequences.") parser.add_argument( "--function", default="lambda a, x: numpy.sin(a * x)", help="An analytical description of the sequence family to learn." " The arguments before the last one are considered parameters.") parser.add_argument("--steps", type=int, default=100, help="Number of steps to plot") parser.add_argument("--params", help="Parameter values for plotting") args = parser.parse_args() function = eval(args.function) num_params = len(inspect.getargspec(function).args) - 1 class Emitter(TrivialEmitter): @application def cost(self, readouts, outputs): """Compute MSE.""" return ((readouts - outputs)**2).sum(axis=readouts.ndim - 1) transition = GatedRecurrent(name="transition", activation=Tanh(), dim=10, weights_init=Orthogonal()) with_params = AddParameters(transition, num_params, "params", name="with_params") generator = SequenceGenerator(LinearReadout( readout_dim=1, source_names=["states"], emitter=Emitter(name="emitter"), name="readout"), with_params, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), name="generator") generator.allocate() logger.debug("Parameters:\n" + pprint.pformat(Selector(generator).get_params().keys())) if args.mode == "train": seed = 1 rng = numpy.random.RandomState(seed) batch_size = 10 generator.initialize() cost = Cost( generator.cost(tensor.tensor3('x'), params=tensor.matrix("params")).sum()) if args.input_noise: cost.apply_noise(cost.inputs, args.input_noise) gh_model = GroundhogModel(generator, cost) state = GroundhogState(args.prefix, batch_size, learning_rate=0.0001).as_dict() data = SeriesIterator(rng, function, 100, batch_size) trainer = SGD(gh_model, state, data) main_loop = MainLoop(data, None, None, gh_model, trainer, state, None) main_loop.load() main_loop.main() elif args.mode == "plot": load_params(generator, args.prefix + "model.npz") params = tensor.matrix("params") sample = theano.function([params], generator.generate(params=params, n_steps=args.steps, batch_size=1)) param_values = numpy.array(map(float, args.params.split()), dtype=floatX) states, outputs, _ = sample(param_values[None, :]) actual = outputs[:, 0, 0] desired = numpy.array( [function(*(list(param_values) + [T])) for T in range(args.steps)]) print("MSE: {}".format(((actual - desired)**2).sum())) pyplot.plot(numpy.hstack([actual[:, None], desired[:, None]])) pyplot.show() else: assert False
def main(): logging.basicConfig( level=logging.DEBUG, format="%(asctime)s: %(name)s: %(levelname)s: %(message)s") parser = argparse.ArgumentParser( "Case study of generating a Markov chain with RNN.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "mode", choices=["train", "sample"], help="The mode to run. Use `train` to train a new model" " and `sample` to sample a sequence generated by an" " existing one.") parser.add_argument( "prefix", default="sine", help="The prefix for model, timing and state files") parser.add_argument( "--steps", type=int, default=100, help="Number of steps to plot") args = parser.parse_args() dim = 10 num_states = ChainIterator.num_states feedback_dim = 8 transition = GatedRecurrent(name="transition", activation=Tanh(), dim=dim) generator = SequenceGenerator( LinearReadout(readout_dim=num_states, source_names=["states"], emitter=SoftmaxEmitter(name="emitter"), feedbacker=LookupFeedback( num_states, feedback_dim, name='feedback'), name="readout"), transition, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), name="generator") generator.allocate() logger.debug("Parameters:\n" + pprint.pformat( [(key, value.get_value().shape) for key, value in Selector(generator).get_params().items()], width=120)) if args.mode == "train": rng = numpy.random.RandomState(1) batch_size = 50 generator.push_initialization_config() transition.weights_init = Orthogonal() generator.initialize() logger.debug("transition.weights_init={}".format( transition.weights_init)) cost = Cost(generator.cost(tensor.lmatrix('x')).sum()) gh_model = GroundhogModel(generator, cost) state = GroundhogState(args.prefix, batch_size, learning_rate=0.0001).as_dict() data = ChainIterator(rng, 100, batch_size) trainer = SGD(gh_model, state, data) main_loop = MainLoop(data, None, None, gh_model, trainer, state, None) main_loop.main() elif args.mode == "sample": load_params(generator, args.prefix + "model.npz") sample = ComputationGraph(generator.generate( n_steps=args.steps, batch_size=1, iterate=True)).function() states, outputs, costs = [data[:, 0] for data in sample()] numpy.set_printoptions(precision=3, suppress=True) print("Generation cost:\n{}".format(costs.sum())) freqs = numpy.bincount(outputs).astype(floatX) freqs /= freqs.sum() print("Frequencies:\n {} vs {}".format(freqs, ChainIterator.equilibrium)) trans_freqs = numpy.zeros((num_states, num_states), dtype=floatX) for a, b in zip(outputs, outputs[1:]): trans_freqs[a, b] += 1 trans_freqs /= trans_freqs.sum(axis=1)[:, None] print("Transition frequencies:\n{}\nvs\n{}".format( trans_freqs, ChainIterator.trans_prob)) else: assert False
def main(): logging.basicConfig( level=logging.DEBUG, format="%(asctime)s: %(name)s: %(levelname)s: %(message)s") parser = argparse.ArgumentParser( "Case study of language modeling with RNN", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "mode", choices=["train", "sample"], help="The mode to run. Use `train` to train a new model" " and `sample` to sample a sequence generated by an" " existing one.") parser.add_argument("prefix", default="sine", help="The prefix for model, timing and state files") parser.add_argument("state", nargs="?", default="", help="Changes to Groundhog state") parser.add_argument("--path", help="Path to a language dataset") parser.add_argument("--dict", help="Path to the dataset dictionary") parser.add_argument("--restart", help="Start anew") parser.add_argument("--reset", action="store_true", default=False, help="Reset the hidden state between batches") parser.add_argument("--steps", type=int, default=100, help="Number of steps to plot for the 'sample' mode" " OR training sequence length for the 'train' mode.") args = parser.parse_args() logger.debug("Args:\n" + str(args)) dim = 200 num_chars = 50 transition = GatedRecurrent(name="transition", activation=Tanh(), dim=dim, weights_init=Orthogonal()) generator = SequenceGenerator(LinearReadout( readout_dim=num_chars, source_names=["states"], emitter=SoftmaxEmitter(name="emitter"), feedbacker=LookupFeedback(num_chars, dim, name='feedback'), name="readout"), transition, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), name="generator") generator.allocate() logger.debug("Parameters:\n" + pprint.pformat( [(key, value.get_value().shape) for key, value in Selector(generator).get_params().items()], width=120)) if args.mode == "train": batch_size = 1 seq_len = args.steps generator.initialize() # Build cost computation graph that uses the saved hidden states. # An issue: for Groundhog this is completely transparent, that's # why it does not carry the hidden state over the period when # validation in done. We should find a way to fix in the future. x = tensor.lmatrix('x') init_states = shared_floatx_zeros((batch_size, dim), name='init_states') reset = tensor.scalar('reset') cost = Cost(generator.cost(x, states=init_states * reset).sum()) # TODO: better search routine states = [ v for v in cost.variables if hasattr(v.tag, 'owner') and v.tag.owner == generator.transition and v.tag.application == generator.transition.apply and v.tag.role == Application.OUTPUT_VARIABLE and v.tag.name == 'states' ] assert len(states) == 1 states = states[0] gh_model = GroundhogModel(generator, cost) gh_model.properties.append( ('bpc', cost.actual_cost() * numpy.log(2) / seq_len)) gh_model.properties.append(('mean_init_state', init_states.mean())) gh_model.properties.append(('reset', reset)) if not args.reset: gh_model.updates.append((init_states, states[-1])) state = GroundhogState(args.prefix, batch_size, learning_rate=0.0001).as_dict() changes = eval("dict({})".format(args.state)) state.update(changes) def output_format(x, y, reset): return dict(x=x[:, None], reset=reset) train, valid, test = [ LMIterator(batch_size=batch_size, use_infinite_loop=mode == 'train', path=args.path, seq_len=seq_len, mode=mode, chunks='chars', output_format=output_format, can_fit=True) for mode in ['train', 'valid', 'test'] ] trainer = SGD(gh_model, state, train) state['on_nan'] = 'warn' state['cutoff'] = 1. main_loop = MainLoop(train, valid, None, gh_model, trainer, state, None) if not args.restart: main_loop.load() main_loop.main() elif args.mode == "sample": load_params(generator, args.prefix + "model.npz") chars = numpy.load(args.dict)['unique_chars'] sample = ComputationGraph( generator.generate(n_steps=args.steps, batch_size=10, iterate=True)).function() states, outputs, costs = sample() for i in range(10): print("Generation cost: {}".format(costs[:, i].sum())) print("".join([chars[o] for o in outputs[:, i]])) else: assert False
class TestGatedRecurrent(unittest.TestCase): def setUp(self): self.gated = GatedRecurrent( dim=3, weights_init=Constant(2), activation=Tanh(), gate_activation=Tanh()) self.gated.initialize() self.reset_only = GatedRecurrent( dim=3, weights_init=IsotropicGaussian(), activation=Tanh(), gate_activation=Tanh(), use_update_gate=False, rng=numpy.random.RandomState(1)) self.reset_only.initialize() def test_one_step(self): h0 = tensor.matrix('h0') x = tensor.matrix('x') z = tensor.matrix('z') r = tensor.matrix('r') h1 = self.gated.apply(x, z, r, h0, iterate=False) next_h = theano.function(inputs=[h0, x, z, r], outputs=[h1]) h0_val = 0.1 * numpy.array([[1, 1, 0], [0, 1, 1]], dtype=floatX) x_val = 0.1 * numpy.array([[1, 2, 3], [4, 5, 6]], dtype=floatX) zi_val = (h0_val + x_val) / 2 ri_val = -x_val W_val = 2 * numpy.ones((3, 3), dtype=floatX) z_val = numpy.tanh(h0_val.dot(W_val) + zi_val) r_val = numpy.tanh(h0_val.dot(W_val) + ri_val) h1_val = (z_val * numpy.tanh((r_val * h0_val).dot(W_val) + x_val) + (1 - z_val) * h0_val) assert_allclose(h1_val, next_h(h0_val, x_val, zi_val, ri_val)[0], rtol=1e-6) def test_reset_only_many_steps(self): x = tensor.tensor3('x') ri = tensor.tensor3('ri') mask = tensor.matrix('mask') h = self.reset_only.apply(x, reset_inps=ri, mask=mask) calc_h = theano.function(inputs=[x, ri, mask], outputs=[h]) x_val = 0.1 * numpy.asarray(list(itertools.permutations(range(4))), dtype=floatX) x_val = numpy.ones((24, 4, 3), dtype=floatX) * x_val[..., None] ri_val = 0.3 - x_val mask_val = numpy.ones((24, 4), dtype=floatX) mask_val[12:24, 3] = 0 h_val = numpy.zeros((25, 4, 3), dtype=floatX) W = self.reset_only.state_to_state.get_value() U = self.reset_only.state_to_reset.get_value() for i in range(1, 25): r_val = numpy.tanh(h_val[i - 1].dot(U) + ri_val[i - 1]) h_val[i] = numpy.tanh((r_val * h_val[i - 1]).dot(W) + x_val[i - 1]) h_val[i] = (mask_val[i - 1, :, None] * h_val[i] + (1 - mask_val[i - 1, :, None]) * h_val[i - 1]) h_val = h_val[1:] # TODO Figure out why this tolerance needs to be so big assert_allclose(h_val, calc_h(x_val, ri_val, mask_val)[0], 1e-03)