Ejemplo n.º 1
0
	def __init__(self, dimen, vocab_size): #{
		# No idea what this is doing, but otherwise "allocated" is not set
		super(MorphGen, self).__init__(self)

		# The encoder 
		encoder = Bidirectional(SimpleRecurrent(dim=dimen, activation=Tanh()))

		# What is this doing ? 
		fork = Fork([name for name in encoder.prototype.apply.sequences if name != 'mask'])
		fork.input_dim = dimen
		fork.output_dims = [encoder.prototype.get_dim(name) for name in fork.input_names]

		lookup = LookupTable(vocab_size, dimen)

		transition = SimpleRecurrent(dim=dimen, activation=Tanh(), name="transition")

		atten = SequenceContentAttention(state_names=transition.apply.states,attended_dim=2*dimen, match_dim=dimen, name="attention")

		readout = Readout(
			readout_dim=vocab_size,
			source_names=[transition.apply.states[0],
			atten.take_glimpses.outputs[0]],
			emitter=SoftmaxEmitter(name="emitter"),
			feedback_brick=LookupFeedback(vocab_size, dimen),
			name="readout");

		generator = SequenceGenerator(readout=readout, transition=transition, attention=atten,name="generator")
	
		self.lookup = lookup
		self.fork = fork
		self.encoder = encoder
		self.generator = generator
		self.children = [lookup, fork, encoder, generator]
Ejemplo n.º 2
0
    def __init__(self, dimension, alphabet_size, **kwargs):
        super(WordReverser, self).__init__(**kwargs)
        encoder = Bidirectional(
            SimpleRecurrent(dim=dimension, activation=Tanh()))
        fork = Fork([name for name in encoder.prototype.apply.sequences
                    if name != 'mask'])
        fork.input_dim = dimension
        fork.output_dims = [dimension for name in fork.input_names]
        lookup = LookupTable(alphabet_size, dimension)
        transition = SimpleRecurrent(
            activation=Tanh(),
            dim=dimension, name="transition")
        attention = SequenceContentAttention(
            state_names=transition.apply.states,
            attended_dim=2 * dimension, match_dim=dimension, name="attention")
        readout = Readout(
            readout_dim=alphabet_size,
            source_names=[transition.apply.states[0],
                          attention.take_glimpses.outputs[0]],
            emitter=SoftmaxEmitter(name="emitter"),
            feedback_brick=LookupFeedback(alphabet_size, dimension),
            name="readout")
        generator = SequenceGenerator(
            readout=readout, transition=transition, attention=attention,
            name="generator")

        self.lookup = lookup
        self.fork = fork
        self.encoder = encoder
        self.generator = generator
        self.children = [lookup, fork, encoder, generator]
Ejemplo n.º 3
0
    def __init__(self, dim, mini_dim, summary_dim, **kwargs):
        super(RNNwMini, self).__init__(**kwargs)
        self.dim = dim
        self.mini_dim = mini_dim
        self.summary_dim = summary_dim

        self.recurrent_layer = SimpleRecurrent(
            dim=self.summary_dim,
            activation=Rectifier(),
            name='recurrent_layer',
            weights_init=IsotropicGaussian(),
            biases_init=Constant(0.0))
        self.mini_recurrent_layer = SimpleRecurrent(
            dim=self.mini_dim,
            activation=Rectifier(),
            name='mini_recurrent_layer',
            weights_init=IsotropicGaussian(),
            biases_init=Constant(0.0))

        self.mini_to_main = Linear(self.dim + self.mini_dim,
                                   self.summary_dim,
                                   name='mini_to_main',
                                   weights_init=IsotropicGaussian(),
                                   biases_init=Constant(0.0))
        self.children = [
            self.recurrent_layer, self.mini_recurrent_layer, self.mini_to_main
        ]
Ejemplo n.º 4
0
def test_similar_scans():
    x = tensor.tensor3('x')
    r1 = SimpleRecurrent(activation=Tanh(), dim=10)
    y1 = r1.apply(x)
    r2 = SimpleRecurrent(activation=Tanh(), dim=10)
    y2 = r2.apply(x)
    cg = ComputationGraph([y1, y2])
    assert len(cg.scans) == 2
Ejemplo n.º 5
0
 def __init__(self, dim, **kwargs):
     super(FeedbackRNN, self).__init__(**kwargs)
     self.dim = dim
     self.first_recurrent_layer = SimpleRecurrent(
         dim=self.dim,
         activation=Identity(),
         name='first_recurrent_layer',
         weights_init=initialization.Identity())
     self.second_recurrent_layer = SimpleRecurrent(
         dim=self.dim,
         activation=Identity(),
         name='second_recurrent_layer',
         weights_init=initialization.Identity())
     self.children = [
         self.first_recurrent_layer, self.second_recurrent_layer
     ]
Ejemplo n.º 6
0
def example():
    """ Simple reccurent example. Taken from : https://github.com/mdda/pycon.sg-2015_deep-learning/blob/master/ipynb/blocks-recurrent-docs.ipynb """
    x = tensor.tensor3('x')

    rnn = SimpleRecurrent(dim=3,
                          activation=Identity(),
                          weights_init=initialization.Identity())
    rnn.initialize()
    h = rnn.apply(x)

    f = theano.function([x], h)
    print(f(np.ones((3, 1, 3), dtype=theano.config.floatX)))

    doubler = Linear(input_dim=3,
                     output_dim=3,
                     weights_init=initialization.Identity(2),
                     biases_init=initialization.Constant(0))
    doubler.initialize()
    h_doubler = rnn.apply(doubler.apply(x))

    f = theano.function([x], h_doubler)
    print(f(np.ones((3, 1, 3), dtype=theano.config.floatX)))

    #Initial State
    h0 = tensor.matrix('h0')
    h = rnn.apply(inputs=x, states=h0)

    f = theano.function([x, h0], h)
    print(
        f(np.ones((3, 1, 3), dtype=theano.config.floatX),
          np.ones((1, 3), dtype=theano.config.floatX)))
Ejemplo n.º 7
0
    def __init__(self, dimension, alphabet_size, **kwargs):
        super(SimpleGenerator, self).__init__(**kwargs)
        lookup = LookupTable(alphabet_size, dimension)
        transition = SimpleRecurrent(activation=Tanh(),
                                     dim=dimension,
                                     name="transition")
        attention = SequenceContentAttention(
            state_names=transition.apply.states,
            attended_dim=dimension,
            match_dim=dimension,
            name="attention")
        readout = Readout(readout_dim=alphabet_size,
                          source_names=[
                              transition.apply.states[0],
                              attention.take_glimpses.outputs[0]
                          ],
                          emitter=SoftmaxEmitter(name="emitter"),
                          feedback_brick=LookupFeedback(
                              alphabet_size, dimension),
                          name="readout")
        generator = SequenceGenerator(readout=readout,
                                      transition=transition,
                                      attention=attention,
                                      name="generator")

        self.lookup = lookup
        self.generator = generator
        self.children = [lookup, generator]
Ejemplo n.º 8
0
def rnn_layer(in_size, dim, x, h, n, first_layer=False):
    if connect_h_to_h == 'all-previous':
        if first_layer:
            rnn_input = x
            linear = Linear(input_dim=in_size,
                            output_dim=dim,
                            name='linear' + str(n) + '-' + str(task))
        elif connect_x_to_h:
            rnn_input = T.concatenate([x] + [hidden for hidden in h], axis=2)
            linear = Linear(input_dim=in_size + dim * n,
                            output_dim=dim,
                            name='linear' + str(n) + '-' + str(task))
        else:
            rnn_input = T.concatenate([hidden for hidden in h], axis=2)
            linear = Linear(input_dim=dim * n,
                            output_dim=dim,
                            name='linear' + str(n) + '-' + str(task))
    elif connect_h_to_h == 'two-previous':
        if first_layer:
            rnn_input = x
            linear = Linear(input_dim=in_size,
                            output_dim=dim,
                            name='linear' + str(n) + '-' + str(task))
        elif connect_x_to_h:
            rnn_input = T.concatenate([x] + h[max(0, n - 2):n], axis=2)
            linear = Linear(input_dim=in_size + dim * 2 if n > 1 else in_size +
                            dim,
                            output_dim=dim,
                            name='linear' + str(n) + '-' + str(task))
        else:
            rnn_input = T.concatenate(h[max(0, n - 2):n], axis=2)
            linear = Linear(input_dim=dim * 2 if n > 1 else dim,
                            output_dim=dim,
                            name='linear' + str(n) + '-' + str(task))
    elif connect_h_to_h == 'one-previous':
        if first_layer:
            rnn_input = x
            linear = Linear(input_dim=in_size,
                            output_dim=dim,
                            name='linear' + str(n) + '-' + str(task))
        elif connect_x_to_h:
            rnn_input = T.concatenate([x] + [h[n - 1]], axis=2)
            linear = Linear(input_dim=in_size + dim,
                            output_dim=dim,
                            name='linear' + str(n) + '-' + str(task))
        else:
            rnn_input = h[n]
            linear = Linear(input_dim=dim,
                            output_dim=dim,
                            name='linear' + str(n) + '-' + str(task))
    rnn = SimpleRecurrent(dim=dim,
                          activation=Tanh(),
                          name=layer_models[n] + str(n) + '-' + str(task))
    initialize([linear, rnn])
    if layer_models[n] == 'rnn':
        return rnn.apply(linear.apply(rnn_input))
    elif layer_models[n] == 'mt_rnn':
        return rnn.apply(linear.apply(rnn_input),
                         time_scale=layer_resolutions[n],
                         time_offset=layer_execution_time_offset[n])
Ejemplo n.º 9
0
def rnn_layer(in_dim, h, h_dim, n):
    linear = Linear(input_dim=in_dim,
                    output_dim=h_dim,
                    name='linear' + str(n) + h.name)
    rnn = SimpleRecurrent(dim=h_dim, name='rnn' + str(n))
    initialize([linear, rnn])
    return rnn.apply(linear.apply(h))
Ejemplo n.º 10
0
def build_model_vanilla(args, dtype=floatX):
    logger.info('Building model ...')

    # Return list of 3D Tensor, one for each layer
    # (Time X Batch X embedding_dim)
    pre_rnn, x_mask = get_prernn(args)

    transitions = [
        SimpleRecurrent(dim=args.state_dim, activation=Tanh())
        for _ in range(args.layers)
    ]

    rnn = RecurrentStack(transitions, skip_connections=args.skip_connections)
    initialize_rnn(rnn, args)

    # Prepare inputs and initial states for the RNN
    kwargs, inits = get_rnn_kwargs(pre_rnn, args)

    # Apply the RNN to the inputs
    h = rnn.apply(low_memory=True, mask=x_mask, **kwargs)

    # We have
    # h = [state, state_1, state_2 ...] if args.layers > 1
    # h = state if args.layers == 1

    # If we have skip connections, concatenate all the states
    # Else only consider the state of the highest layer
    last_states = {}
    hidden_states = []
    if args.layers > 1:
        # Save all the last states
        for d in range(args.layers):
            # TODO correct bug
            # h[d] = h[d] * x_mask
            last_states[d] = h[d][-1, :, :]
            h[d].name = "hidden_state_" + str(d)
            hidden_states.append(h[d])
        if args.skip_connections or args.skip_output:
            h = tensor.concatenate(h, axis=2)
        else:
            h = h[-1]
    else:
        # TODO correct bug
        # hidden_states.append(h * x_mask)
        hidden_states.append(h)
        hidden_states[0].name = "hidden_state_0"
        # Note: if we have mask, then updating initial state
        # with last state does not make sence anymore.
        last_states[0] = h[-1, :, :]

    # The updates of the hidden states
    updates = []
    for d in range(args.layers):
        updates.append((inits[0][d], last_states[d]))

    presoft = get_presoft(h, args)

    cost, unregularized_cost = get_costs(presoft, args)

    return cost, unregularized_cost, updates, hidden_states
Ejemplo n.º 11
0
def rnn_layer(in_dim, h, h_dim, n, pref=""):
    linear = Linear(input_dim=in_dim,
                    output_dim=h_dim,
                    name='linear' + str(n) + pref)
    rnn = SimpleRecurrent(dim=h_dim,
                          activation=Tanh(),
                          name='rnn' + str(n) + pref)
    initialize([linear, rnn])
    return rnn.apply(linear.apply(h))
Ejemplo n.º 12
0
def example5():
    """Bidir + simplereccurent. Adaptation from a unittest in blocks """

    bidir = Bidirectional(weights_init=Orthogonal(),
                          prototype=SimpleRecurrent(dim=3, activation=Tanh()))

    simple = SimpleRecurrent(dim=3,
                             weights_init=Orthogonal(),
                             activation=Tanh(),
                             seed=1)

    bidir.allocate()
    simple.initialize()

    bidir.children[0].parameters[0].set_value(simple.parameters[0].get_value())

    bidir.children[1].parameters[0].set_value(simple.parameters[0].get_value())

    #Initialize theano variables and functions
    x = tensor.tensor3('x')
    mask = tensor.matrix('mask')

    calc_bidir = theano.function([x, mask], [bidir.apply(x, mask=mask)])
    calc_simple = theano.function([x, mask], [simple.apply(x, mask=mask)])

    #Testing time

    x_val = 0.1 * np.asarray(list(itertools.permutations(range(4))),
                             dtype=theano.config.floatX)

    x_val = (np.ones(
        (24, 4, 3), dtype=theano.config.floatX) * x_val[..., None])

    mask_val = np.ones((24, 4), dtype=theano.config.floatX)
    mask_val[12:24, 3] = 0

    h_bidir = calc_bidir(x_val, mask_val)[0]
    h_simple = calc_simple(x_val, mask_val)[0]
    h_simple_rev = calc_simple(x_val[::-1], mask_val[::-1])[0]

    print(h_bidir)
    print(h_simple)
    print(h_simple_rev)
Ejemplo n.º 13
0
 def setUp(self):
     self.bidir = Bidirectional(weights_init=Orthogonal(),
                                prototype=SimpleRecurrent(
                                    dim=3, activation=Tanh()))
     self.simple = SimpleRecurrent(dim=3, weights_init=Orthogonal(),
                                   activation=Tanh(), seed=1)
     self.bidir.allocate()
     self.simple.initialize()
     self.bidir.children[0].parameters[0].set_value(
         self.simple.parameters[0].get_value())
     self.bidir.children[1].parameters[0].set_value(
         self.simple.parameters[0].get_value())
     self.x_val = 0.1 * numpy.asarray(
         list(itertools.permutations(range(4))),
         dtype=theano.config.floatX)
     self.x_val = (numpy.ones((24, 4, 3), dtype=theano.config.floatX) *
                   self.x_val[..., None])
     self.mask_val = numpy.ones((24, 4), dtype=theano.config.floatX)
     self.mask_val[12:24, 3] = 0
Ejemplo n.º 14
0
    def _build_bricks(self, *args, **kwargs):
        # Build lookup tables
        self.word_embed = self._embed(len(self.dataset.word2index),
                                      self.config.word_embed_dim,
                                      name='word_embed')

        self.hashtag_embed = self._embed(len(self.dataset.hashtag2index),
                                         self.config.lstm_dim,
                                         name='hashtag_embed')
        # Build text encoder
        self.mlstm_ins = Linear(input_dim=self.config.word_embed_dim,
                                output_dim=4 * self.config.lstm_dim,
                                name='mlstm_in')
        self.mlstm_ins.weights_init = IsotropicGaussian(
            std=numpy.sqrt(2) /
            numpy.sqrt(self.config.word_embed_dim + self.config.lstm_dim))
        self.mlstm_ins.biases_init = Constant(0)
        self.mlstm_ins.initialize()
        self.mlstm = MLSTM(self.config.lstm_time,
                           self.config.lstm_dim,
                           shared=False)
        self.mlstm.weights_init = IsotropicGaussian(
            std=numpy.sqrt(2) /
            numpy.sqrt(self.config.word_embed_dim + self.config.lstm_dim))
        self.mlstm.biases_init = Constant(0)
        self.mlstm.initialize()
        self.hashtag2word = MLP(
            activations=[Tanh('hashtag2word_tanh')],
            dims=[self.config.lstm_dim, self.config.word_embed_dim],
            name='hashtag2word_mlp')
        self.hashtag2word.weights_init = IsotropicGaussian(
            std=1 / numpy.sqrt(self.config.word_embed_dim))
        self.hashtag2word.biases_init = Constant(0)
        self.hashtag2word.initialize()
        self.hashtag2word_bias = Bias(dim=1, name='hashtag2word_bias')
        self.hashtag2word_bias.biases_init = Constant(0)
        self.hashtag2word_bias.initialize()
        #Build character embedding
        self.char_embed = self._embed(len(self.dataset.char2index),
                                      self.config.char_embed_dim,
                                      name='char_embed')
        # Build sparse word encoder
        self.rnn_ins = Linear(input_dim=self.config.char_embed_dim,
                              output_dim=self.config.word_embed_dim,
                              name='rnn_in')
        self.rnn_ins.weights_init = IsotropicGaussian(
            std=numpy.sqrt(2) / numpy.sqrt(self.config.char_embed_dim +
                                           self.config.word_embed_dim))
        self.rnn_ins.biases_init = Constant(0)
        self.rnn_ins.initialize()
        self.rnn = SimpleRecurrent(dim=self.config.word_embed_dim,
                                   activation=Tanh())
        self.rnn.weights_init = IsotropicGaussian(
            std=1 / numpy.sqrt(self.config.word_embed_dim))
        self.rnn.initialize()
Ejemplo n.º 15
0
def rnn_layer(dim, h, n, x_mask, first, **kwargs):
    linear = Linear(input_dim=dim, output_dim=dim, name='linear' + str(n))
    rnn = SimpleRecurrent(dim=dim, activation=Rectifier(), name='rnn' + str(n))
    initialize([linear, rnn])
    applyLin = linear.apply(h)
    if first:
        rnnApply = rnn.apply(applyLin, mask=x_mask, **kwargs)
    else:
        rnnApply = rnn.apply(applyLin, **kwargs)

    return rnnApply
Ejemplo n.º 16
0
    def __init__(self, dim, depth, **kwargs):
        super(FeedbackRNNStack, self).__init__(**kwargs)
        self.dim = dim
        self.depth = depth
        self.children = []
        FeedbackRNNStack.depth = depth

        for i in range(depth):
            self.children.append(
                SimpleRecurrent(dim=self.dim,
                                activation=Identity(),
                                name=str(i) + 'th_recurrent_layer',
                                weights_init=initialization.Identity()))
Ejemplo n.º 17
0
def create_rnn(hidden_dim, vocab_dim, mode="rnn"):
    # input
    x = tensor.imatrix('inchar')
    y = tensor.imatrix('outchar')

    #
    W = LookupTable(
        name="W1",
        #dim = hidden_dim*4,
        dim=hidden_dim,
        length=vocab_dim,
        weights_init=initialization.IsotropicGaussian(0.01),
        biases_init=initialization.Constant(0))
    if mode == "lstm":
        # Long Short Term Memory
        H = LSTM(hidden_dim,
                 name='H',
                 weights_init=initialization.IsotropicGaussian(0.01),
                 biases_init=initialization.Constant(0.0))
    else:
        # recurrent history weight
        H = SimpleRecurrent(
            name="H",
            dim=hidden_dim,
            activation=Tanh(),
            weights_init=initialization.IsotropicGaussian(0.01))
    #
    S = Linear(name="W2",
               input_dim=hidden_dim,
               output_dim=vocab_dim,
               weights_init=initialization.IsotropicGaussian(0.01),
               biases_init=initialization.Constant(0))

    A = NDimensionalSoftmax(name="softmax")

    initLayers([W, H, S])
    activations = W.apply(x)
    hiddens = H.apply(activations)  #[0]
    activations2 = S.apply(hiddens)
    y_hat = A.apply(activations2, extra_ndim=1)
    cost = A.categorical_cross_entropy(y, activations2, extra_ndim=1).mean()

    cg = ComputationGraph(cost)
    #print VariableFilter(roles=[WEIGHT])(cg.variables)
    #W1,H,W2 = VariableFilter(roles=[WEIGHT])(cg.variables)

    layers = (x, W, H, S, A, y)

    return cg, layers, y_hat, cost
Ejemplo n.º 18
0
    def __init__(self, dimension, input_size, embed_input=False, **kwargs):
        super(SimpleEncoder, self).__init__(**kwargs)
        if embed_input:
            self.embedder = LookupTable(input_size, dimension)
        else:
            self.embedder = Linear(input_size, dimension)
        self.transform = MLP([Tanh()], [dimension, dimension])
        self.fork = Fork(['inputs'],
                         dimension,
                         output_dims=[dimension],
                         prototype=Linear(dimension, dimension))
        encoder = Bidirectional(
            SimpleRecurrent(dim=dimension, activation=Tanh()))

        self.encoder = encoder
        self.children = [encoder, self.embedder, self.transform, self.fork]
Ejemplo n.º 19
0
    def __init__(self, dims=(88, 100, 100), **kwargs):
        super(Rnn, self).__init__(**kwargs)
        self.dims = dims

        self.input_transform = Linear(
            input_dim=dims[0],
            output_dim=dims[1],
            weights_init=IsotropicGaussian(0.01),
            # biases_init=Constant(0.0),
            use_bias=False,
            name="input_transfrom")

        self.gru_layer = SimpleRecurrent(dim=dims[1],
                                         activation=Tanh(),
                                         weights_init=IsotropicGaussian(0.01),
                                         biases_init=Constant(0.0),
                                         use_bias=True,
                                         name="gru_rnn_layer")

        # TODO: find a way to automatically set the output dim in case of lstm vs normal rnn
        self.linear_trans = Linear(input_dim=dims[1],
                                   output_dim=dims[2] * 4,
                                   weights_init=IsotropicGaussian(0.01),
                                   biases_init=Constant(0.0),
                                   use_bias=False,
                                   name="h2h_transform")

        self.lstm_layer = LSTM(dim=dims[2],
                               activation=Tanh(),
                               weights_init=IsotropicGaussian(0.01),
                               biases_init=Constant(0.0),
                               use_bias=True,
                               name="lstm_rnn_layer")

        self.out_transform = MLP(activations=[Sigmoid()],
                                 dims=[dims[2], dims[0]],
                                 weights_init=IsotropicGaussian(0.01),
                                 use_bias=True,
                                 biases_init=Constant(0.0),
                                 name="out_layer")

        self.children = [
            self.input_transform, self.gru_layer, self.linear_trans,
            self.lstm_layer, self.out_transform
        ]
Ejemplo n.º 20
0
    def setUp(self):
        prototype = SimpleRecurrent(dim=3, activation=Tanh())
        self.layers = [
            Bidirectional(weights_init=Orthogonal(), prototype=prototype)
            for _ in range(3)
        ]
        self.stack = RecurrentStack(self.layers)
        for fork in self.stack.forks:
            fork.weights_init = Identity(1)
            fork.biases_init = Constant(0)
        self.stack.initialize()

        self.x_val = 0.1 * numpy.asarray(
            list(itertools.permutations(range(4))), dtype=theano.config.floatX)
        self.x_val = (numpy.ones(
            (24, 4, 3), dtype=theano.config.floatX) * self.x_val[..., None])
        self.mask_val = numpy.ones((24, 4), dtype=theano.config.floatX)
        self.mask_val[12:24, 3] = 0
Ejemplo n.º 21
0
def test_saved_inner_graph():
    """Make sure that the original inner graph is saved."""
    x = tensor.tensor3()
    recurrent = SimpleRecurrent(dim=3, activation=Tanh())
    y = recurrent.apply(x)

    application_call = get_application_call(y)
    assert application_call.inner_inputs
    assert application_call.inner_outputs

    cg = ComputationGraph(application_call.inner_outputs)
    # Check that the inner scan graph is annotated
    # with `recurrent.apply`
    assert len(VariableFilter(applications=[recurrent.apply])(cg)) == 3
    # Check that the inner graph is equivalent to the one
    # produced by a stand-alone of `recurrent.apply`
    assert is_same_graph(application_call.inner_outputs[0],
                         recurrent.apply(*application_call.inner_inputs,
                                         iterate=False))
Ejemplo n.º 22
0
so let's think about sizes of the arrays...
"""

x = tensor.matrix('tokens', dtype="int32")

x_mask = tensor.matrix('tokens_mask', dtype=floatX)
#rnn.apply(inputs=input_to_hidden.apply(x), mask=x_mask)

lookup = LookupTable(vocab_size, embedding_dim)

x_extra = tensor.tensor3('extras', dtype=floatX)

rnn = Bidirectional(
    SimpleRecurrent(
        dim=hidden_dim,
        activation=Tanh(),
        weights_init=IsotropicGaussian(0.01),
        biases_init=Constant(0),
    ), )

### Will need to reshape the rnn outputs to produce suitable input here...
gather = Linear(name='hidden_to_output',
                input_dim=hidden_dim * 2,
                output_dim=labels_size,
                weights_init=IsotropicGaussian(0.01),
                biases_init=Constant(0))

p_labels = Softmax()

## Let's initialize the variables
lookup.allocate()
#print("lookup.parameters=", lookup.parameters)                         # ('lookup.parameters=', [W])
Ejemplo n.º 23
0
 def setUp(self):
     self.simple = SimpleRecurrent(dim=3, weights_init=Constant(2),
                                   activation=Tanh())
     self.simple.initialize()
Ejemplo n.º 24
0
    def __init__(self, input_sources_list, input_sources_vocab_size_list,
                 output_source, output_source_vocab_size,
                 lookup_dim=200, hidden_size=256, recurrent_stack_size=1):

        self.InputSources = input_sources_list
        self.InputSourcesVocab = input_sources_vocab_size_list
        self.OutputSource = output_source
        self.OutputSourceVocab = output_source_vocab_size

        inputs = [tensor.lmatrix(source) for source in input_sources_list]
        output = tensor.lmatrix(output_source)

        lookups = self.get_lookups(lookup_dim, input_sources_vocab_size_list)

        for lookup in lookups:
            lookup.initialize()

        merge = Merge([lookup.name for lookup in lookups], [lookup.dim for lookup in lookups], hidden_size,
                              weights_init=initialization.Uniform(width=0.01),
                              biases_init=Constant(0))
        merge.initialize()

        linear0 = Linear(input_dim=hidden_size, output_dim=hidden_size,
                        weights_init=initialization.Uniform(width=0.01),
                        biases_init=Constant(0), name='linear0')
        linear0.initialize()

        recurrent_blocks = []

        for i in range(recurrent_stack_size):
            recurrent_blocks.append(SimpleRecurrent(
                dim=hidden_size, activation=Tanh(),
                weights_init=initialization.Uniform(width=0.01),
                use_bias=False))

        for i, recurrent_block in enumerate(recurrent_blocks):
            recurrent_block.name = 'recurrent'+str(i+1)
            recurrent_block.initialize()

        linear_out = Linear(input_dim=hidden_size, output_dim=output_source_vocab_size,
                              weights_init=initialization.Uniform(width=0.01),
                              biases_init=Constant(0), name='linear_out')
        linear_out.initialize()
        softmax = NDimensionalSoftmax(name='softmax')

        lookup_outputs = [lookup.apply(input) for lookup, input in zip(lookups, inputs)]

        m = merge.apply(*lookup_outputs)
        r = linear0.apply(m)
        for block in recurrent_blocks:
            r = block.apply(r)
        a = linear_out.apply(r)

        self.Cost = softmax.categorical_cross_entropy(output, a, extra_ndim=1).mean()
        self.Cost.name = 'cost'

        y_hat = softmax.apply(a, extra_ndim=1)
        y_hat.name = 'y_hat'

        self.ComputationGraph = ComputationGraph(self.Cost)

        self.Function = None
        self.MainLoop = None
        self.Model = Model(y_hat)
Ejemplo n.º 25
0
def build_model_hard(vocab_size, args, dtype=floatX):
    logger.info('Building model ...')

    # Parameters for the model
    context = args.context
    state_dim = args.state_dim
    layers = args.layers
    skip_connections = args.skip_connections

    # Symbolic variables
    # In both cases: Time X Batch
    x = tensor.lmatrix('features')
    y = tensor.lmatrix('targets')

    # Build the model
    output_names = []
    output_dims = []
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if d == 0 or skip_connections:
            output_names.append("inputs" + suffix)
            output_dims.append(state_dim)

    lookup = LookupTable(length=vocab_size, dim=state_dim)
    lookup.weights_init = initialization.IsotropicGaussian(0.1)
    lookup.biases_init = initialization.Constant(0)

    fork = Fork(output_names=output_names,
                input_dim=args.mini_batch_size,
                output_dims=output_dims,
                prototype=FeedforwardSequence([lookup.apply]))

    transitions = [SimpleRecurrent(dim=state_dim, activation=Tanh())]
    for i in range(layers - 1):
        mlp = MLP(activations=[Logistic()],
                  dims=[2 * state_dim, 1],
                  weights_init=initialization.IsotropicGaussian(0.1),
                  biases_init=initialization.Constant(0),
                  name="mlp_" + str(i))
        transitions.append(
            HardGatedRecurrent(dim=state_dim, mlp=mlp, activation=Tanh()))

    rnn = RecurrentStack(transitions, skip_connections=skip_connections)

    # dim = layers * state_dim
    output_layer = Linear(input_dim=layers * state_dim,
                          output_dim=vocab_size,
                          name="output_layer")

    # Return list of 3D Tensor, one for each layer
    # (Time X Batch X embedding_dim)
    pre_rnn = fork.apply(x)

    # Give a name to the input of each layer
    if skip_connections:
        for t in range(len(pre_rnn)):
            pre_rnn[t].name = "pre_rnn_" + str(t)
    else:
        pre_rnn.name = "pre_rnn"

    # Prepare inputs for the RNN
    kwargs = OrderedDict()
    init_states = {}
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if skip_connections:
            kwargs['inputs' + suffix] = pre_rnn[d]
        elif d == 0:
            kwargs['inputs' + suffix] = pre_rnn
        init_states[d] = theano.shared(numpy.zeros(
            (args.mini_batch_size, state_dim)).astype(floatX),
                                       name='state0_%d' % d)
        kwargs['states' + suffix] = init_states[d]

    # Apply the RNN to the inputs
    h = rnn.apply(low_memory=True, **kwargs)

    # Now we have correctly:
    # h = [state_1, state_2, state_3 ...]

    # Save all the last states
    last_states = {}
    for d in range(layers):
        last_states[d] = h[d][-1, :, :]

    # Concatenate all the states
    if layers > 1:
        h = tensor.concatenate(h, axis=2)
    h.name = "hidden_state"

    # The updates of the hidden states
    updates = []
    for d in range(layers):
        updates.append((init_states[d], last_states[d]))

    presoft = output_layer.apply(h[context:, :, :])
    # Define the cost
    # Compute the probability distribution
    time, batch, feat = presoft.shape
    presoft.name = 'presoft'

    cross_entropy = Softmax().categorical_cross_entropy(
        y[context:, :].flatten(), presoft.reshape((batch * time, feat)))
    cross_entropy = cross_entropy / tensor.log(2)
    cross_entropy.name = "cross_entropy"

    # TODO: add regularisation for the cost
    # the log(1) is here in order to differentiate the two variables
    # for monitoring
    cost = cross_entropy + tensor.log(1)
    cost.name = "regularized_cost"

    # Initialize the model
    logger.info('Initializing...')

    fork.initialize()

    rnn.weights_init = initialization.Orthogonal()
    rnn.biases_init = initialization.Constant(0)
    rnn.initialize()

    output_layer.weights_init = initialization.IsotropicGaussian(0.1)
    output_layer.biases_init = initialization.Constant(0)
    output_layer.initialize()

    return cost, cross_entropy, updates
def test_sequence_generator():
    """Test a sequence generator with no contexts and continuous outputs.

    Such sequence generators can be used to model e.g. dynamical systems.

    """
    rng = numpy.random.RandomState(1234)

    output_dim = 1
    dim = 20
    batch_size = 30
    n_steps = 10

    transition = SimpleRecurrent(activation=Tanh(), dim=dim,
                                 weights_init=Orthogonal())
    generator = SequenceGenerator(
        Readout(readout_dim=output_dim, source_names=["states"],
                emitter=TestEmitter()),
        transition,
        weights_init=IsotropicGaussian(0.1), biases_init=Constant(0.0),
        seed=1234)
    generator.initialize()

    # Test 'cost_matrix' method
    y = tensor.tensor3('y')
    mask = tensor.matrix('mask')
    costs = generator.cost_matrix(y, mask)
    assert costs.ndim == 2
    y_test = rng.uniform(size=(n_steps, batch_size, output_dim)).astype(floatX)
    m_test = numpy.ones((n_steps, batch_size), dtype=floatX)
    costs_val = theano.function([y, mask], [costs])(y_test, m_test)[0]
    assert costs_val.shape == (n_steps, batch_size)
    assert_allclose(costs_val.sum(), 115.593, rtol=1e-5)

    # Test 'cost' method
    cost = generator.cost(y, mask)
    assert cost.ndim == 0
    cost_val = theano.function([y, mask], [cost])(y_test, m_test)
    assert_allclose(cost_val, 3.8531, rtol=1e-5)

    # Test 'AUXILIARY' variable 'per_sequence_element' in 'cost' method
    cg = ComputationGraph([cost])
    var_filter = VariableFilter(roles=[AUXILIARY])
    aux_var_name = '_'.join([generator.name, generator.cost.name,
                             'per_sequence_element'])
    cost_per_el = [el for el in var_filter(cg.variables)
                   if el.name == aux_var_name][0]
    assert cost_per_el.ndim == 0
    cost_per_el_val = theano.function([y, mask], [cost_per_el])(y_test, m_test)
    assert_allclose(cost_per_el_val, 0.38531, rtol=1e-5)

    # Test 'generate' method
    states, outputs, costs = [variable.eval() for variable in
                              generator.generate(
                                  states=rng.uniform(
                                      size=(batch_size, dim)).astype(floatX),
                                  iterate=True, batch_size=batch_size,
                                  n_steps=n_steps)]
    assert states.shape == (n_steps, batch_size, dim)
    assert outputs.shape == (n_steps, batch_size, output_dim)
    assert costs.shape == (n_steps, batch_size)
    assert_allclose(outputs.sum(), -0.33683, rtol=1e-5)
    assert_allclose(states.sum(), 15.7909, rtol=1e-5)
    # There is no generation cost in this case, since generation is
    # deterministic
    assert_allclose(costs.sum(), 0.0)
Ejemplo n.º 27
0
def test_attention_recurrent():
    rng = numpy.random.RandomState(1234)

    dim = 5
    batch_size = 4
    input_length = 20

    attended_dim = 10
    attended_length = 15

    wrapped = SimpleRecurrent(dim, Identity())
    attention = SequenceContentAttention(state_names=wrapped.apply.states,
                                         attended_dim=attended_dim,
                                         match_dim=attended_dim)
    recurrent = AttentionRecurrent(wrapped, attention, seed=1234)
    recurrent.weights_init = IsotropicGaussian(0.5)
    recurrent.biases_init = Constant(0)
    recurrent.initialize()

    attended = tensor.tensor3("attended")
    attended_mask = tensor.matrix("attended_mask")
    inputs = tensor.tensor3("inputs")
    inputs_mask = tensor.matrix("inputs_mask")
    outputs = recurrent.apply(inputs=inputs,
                              mask=inputs_mask,
                              attended=attended,
                              attended_mask=attended_mask)
    states, glimpses, weights = outputs
    assert states.ndim == 3
    assert glimpses.ndim == 3
    assert weights.ndim == 3

    # For values.
    def rand(size):
        return rng.uniform(size=size).astype(floatX)

    # For masks.
    def generate_mask(length, batch_size):
        mask = numpy.ones((length, batch_size), dtype=floatX)
        # To make it look like read data
        for i in range(batch_size):
            mask[1 + rng.randint(0, length - 1):, i] = 0.0
        return mask

    input_vals = rand((input_length, batch_size, dim))
    input_mask_vals = generate_mask(input_length, batch_size)
    attended_vals = rand((attended_length, batch_size, attended_dim))
    attended_mask_vals = generate_mask(attended_length, batch_size)

    func = theano.function([inputs, inputs_mask, attended, attended_mask],
                           [states, glimpses, weights])
    states_vals, glimpses_vals, weight_vals = func(input_vals, input_mask_vals,
                                                   attended_vals,
                                                   attended_mask_vals)
    assert states_vals.shape == (input_length, batch_size, dim)
    assert glimpses_vals.shape == (input_length, batch_size, attended_dim)

    assert (len(ComputationGraph(outputs).shared_variables) == len(
        Selector(recurrent).get_params()))

    # weights for not masked position must be zero
    assert numpy.all(weight_vals * (1 - attended_mask_vals.T) == 0)
    # weights for masked positions must be non-zero
    assert numpy.all(abs(weight_vals + (1 - attended_mask_vals.T)) > 1e-5)
    # weights from different steps should be noticeably different
    assert (abs(weight_vals[0] - weight_vals[1])).sum() > 1e-2
    # weights for all state after the last masked position should be same
    for i in range(batch_size):
        last = int(input_mask_vals[:, i].sum())
        for j in range(last, input_length):
            assert_allclose(weight_vals[last, i], weight_vals[j, i])

    # freeze sums
    assert_allclose(weight_vals.sum(), input_length * batch_size, 1e-5)
    assert_allclose(states_vals.sum(), 113.429, rtol=1e-5)
    assert_allclose(glimpses_vals.sum(), 415.901, rtol=1e-5)
Ejemplo n.º 28
0
lookup_input = LookupTable(name='lookup_input',
                           length=train_dataset.syllables_vocab_size() + 1,
                           dim=hidden_layer_dim,
                           weights_init=initialization.Uniform(width=0.01),
                           biases_init=Constant(0))
lookup_input.initialize()

linear_input = Linear(name='linear_input',
                      input_dim=hidden_layer_dim,
                      output_dim=hidden_layer_dim,
                      weights_init=initialization.Uniform(width=0.01),
                      biases_init=Constant(0))
linear_input.initialize()

rnn = SimpleRecurrent(name='hidden',
                      dim=hidden_layer_dim,
                      activation=Tanh(),
                      weights_init=initialization.Uniform(width=0.01))
rnn.initialize()

linear_output = Linear(name='linear_output',
                       input_dim=hidden_layer_dim,
                       output_dim=train_dataset.durations_vocab_size(),
                       weights_init=initialization.Uniform(width=0.01),
                       biases_init=Constant(0))
linear_output.initialize()

softmax = NDimensionalSoftmax(name='ndim_softmax')

activation_input = lookup_input.apply(x)
hidden = rnn.apply(linear_input.apply(activation_input))
activation_output = linear_output.apply(hidden)
Ejemplo n.º 29
0
floatX = theano.config.floatX

n_epochs = 30
x_dim = 1
h_dim = 100
o_dim = 10
batch_size = 50

print 'Building model ...'
# T x B x F
x = tensor.tensor3('x', dtype=floatX)
y = tensor.tensor3('y', dtype='int32')

x_to_h1 = Linear(name='x_to_h1', input_dim=x_dim, output_dim=h_dim)
pre_rnn = x_to_h1.apply(x)
rnn = SimpleRecurrent(activation=Rectifier(), dim=h_dim, name="rnn")
h1 = rnn.apply(pre_rnn)
h1_to_o = Linear(name='h1_to_o', input_dim=h_dim, output_dim=o_dim)
pre_softmax = h1_to_o.apply(h1)
softmax = Softmax()
shape = pre_softmax.shape
softmax_out = softmax.apply(pre_softmax.reshape((-1, o_dim)))
softmax_out = softmax_out.reshape(shape)
softmax_out.name = 'softmax_out'

# comparing only last time-step
cost = CategoricalCrossEntropy().apply(y[-1, :, 0], softmax_out[-1])
cost.name = 'CrossEntropy'
error_rate = MisclassificationRate().apply(y[-1, :, 0], softmax_out[-1])
error_rate.name = 'error_rate'
Ejemplo n.º 30
0
def main(mode, save_path, num_batches, data_path=None):
    # Experiment configuration
    dimension = 100
    readout_dimension = len(char2code)

    # Build bricks
    encoder = Bidirectional(SimpleRecurrent(dim=dimension, activation=Tanh()),
                            weights_init=Orthogonal())
    fork = Fork(
        [name for name in encoder.prototype.apply.sequences if name != 'mask'],
        weights_init=IsotropicGaussian(0.1),
        biases_init=Constant(0))
    fork.input_dim = dimension
    fork.output_dims = {name: dimension for name in fork.input_names}
    lookup = LookupTable(readout_dimension,
                         dimension,
                         weights_init=IsotropicGaussian(0.1))
    transition = SimpleRecurrent(activation=Tanh(),
                                 dim=dimension,
                                 name="transition")
    attention = SequenceContentAttention(state_names=transition.apply.states,
                                         sequence_dim=2 * dimension,
                                         match_dim=dimension,
                                         name="attention")
    readout = LinearReadout(readout_dim=readout_dimension,
                            source_names=["states"],
                            emitter=SoftmaxEmitter(name="emitter"),
                            feedbacker=LookupFeedback(readout_dimension,
                                                      dimension),
                            name="readout")
    generator = SequenceGenerator(readout=readout,
                                  transition=transition,
                                  attention=attention,
                                  weights_init=IsotropicGaussian(0.1),
                                  biases_init=Constant(0),
                                  name="generator")
    generator.push_initialization_config()
    transition.weights_init = Orthogonal()

    if mode == "train":
        # Data processing pipeline
        dataset_options = dict(dictionary=char2code,
                               level="character",
                               preprocess=_lower)
        if data_path:
            dataset = TextFile(data_path, **dataset_options)
        else:
            dataset = OneBillionWord("training", [99], **dataset_options)
        data_stream = DataStreamMapping(
            mapping=_transpose,
            data_stream=PaddingDataStream(
                BatchDataStream(
                    iteration_scheme=ConstantScheme(10),
                    data_stream=DataStreamMapping(
                        mapping=reverse_words,
                        add_sources=("targets", ),
                        data_stream=DataStreamFilter(
                            predicate=_filter_long,
                            data_stream=dataset.get_default_stream())))))

        # Build the cost computation graph
        chars = tensor.lmatrix("features")
        chars_mask = tensor.matrix("features_mask")
        targets = tensor.lmatrix("targets")
        targets_mask = tensor.matrix("targets_mask")
        batch_cost = generator.cost(
            targets,
            targets_mask,
            attended=encoder.apply(**dict_union(fork.apply(
                lookup.lookup(chars), return_dict=True),
                                                mask=chars_mask)),
            attended_mask=chars_mask).sum()
        batch_size = named_copy(chars.shape[1], "batch_size")
        cost = aggregation.mean(batch_cost, batch_size)
        cost.name = "sequence_log_likelihood"
        logger.info("Cost graph is built")

        # Give an idea of what's going on
        model = Model(cost)
        params = model.get_params()
        logger.info("Parameters:\n" +
                    pprint.pformat([(key, value.get_value().shape)
                                    for key, value in params.items()],
                                   width=120))

        # Initialize parameters
        for brick in model.get_top_bricks():
            brick.initialize()

        # Fetch variables useful for debugging
        max_length = named_copy(chars.shape[0], "max_length")
        cost_per_character = named_copy(
            aggregation.mean(batch_cost, batch_size * max_length),
            "character_log_likelihood")
        cg = ComputationGraph(cost)
        (energies, ) = VariableFilter(application=readout.readout,
                                      name="output")(cg.variables)
        min_energy = named_copy(energies.min(), "min_energy")
        max_energy = named_copy(energies.max(), "max_energy")
        (activations, ) = VariableFilter(
            application=generator.transition.apply,
            name="states")(cg.variables)
        mean_activation = named_copy(
            abs(activations).mean(), "mean_activation")

        # Define the training algorithm.
        algorithm = GradientDescent(cost=cost,
                                    step_rule=CompositeRule(
                                        [StepClipping(10.0),
                                         Scale(0.01)]))

        # More variables for debugging
        observables = [
            cost, min_energy, max_energy, mean_activation, batch_size,
            max_length, cost_per_character, algorithm.total_step_norm,
            algorithm.total_gradient_norm
        ]
        for name, param in params.items():
            observables.append(named_copy(param.norm(2), name + "_norm"))
            observables.append(
                named_copy(algorithm.gradients[param].norm(2),
                           name + "_grad_norm"))

        # Construct the main loop and start training!
        average_monitoring = TrainingDataMonitoring(observables,
                                                    prefix="average",
                                                    every_n_batches=10)
        main_loop = MainLoop(
            model=model,
            data_stream=data_stream,
            algorithm=algorithm,
            extensions=[
                Timing(),
                TrainingDataMonitoring(observables, after_every_batch=True),
                average_monitoring,
                FinishAfter(after_n_batches=num_batches).add_condition(
                    "after_batch", _is_nan),
                Plot(os.path.basename(save_path),
                     [[average_monitoring.record_name(cost)],
                      [average_monitoring.record_name(cost_per_character)]],
                     every_n_batches=10),
                SerializeMainLoop(save_path,
                                  every_n_batches=500,
                                  save_separately=["model", "log"]),
                Printing(every_n_batches=1)
            ])
        main_loop.run()
    elif mode == "test":
        logger.info("Model is loaded")
        chars = tensor.lmatrix("features")
        generated = generator.generate(
            n_steps=3 * chars.shape[0],
            batch_size=chars.shape[1],
            attended=encoder.apply(**dict_union(
                fork.apply(lookup.lookup(chars), return_dict=True))),
            attended_mask=tensor.ones(chars.shape))
        model = Model(generated)
        model.set_param_values(load_parameter_values(save_path))
        sample_function = model.get_theano_function()
        logging.info("Sampling function is compiled")

        while True:
            # Python 2-3 compatibility
            line = input("Enter a sentence\n")
            batch_size = int(input("Enter a number of samples\n"))
            encoded_input = [
                char2code.get(char, char2code["<UNK>"])
                for char in line.lower().strip()
            ]
            encoded_input = ([char2code['<S>']] + encoded_input +
                             [char2code['</S>']])
            print("Encoder input:", encoded_input)
            target = reverse_words((encoded_input, ))[0]
            print("Target: ", target)
            states, samples, glimpses, weights, costs = sample_function(
                numpy.repeat(numpy.array(encoded_input)[:, None],
                             batch_size,
                             axis=1))

            messages = []
            for i in range(samples.shape[1]):
                sample = list(samples[:, i])
                try:
                    true_length = sample.index(char2code['</S>']) + 1
                except ValueError:
                    true_length = len(sample)
                sample = sample[:true_length]
                cost = costs[:true_length, i].sum()
                message = "({})".format(cost)
                message += "".join(code2char[code] for code in sample)
                if sample == target:
                    message += " CORRECT!"
                messages.append((cost, message))
            messages.sort(key=operator.itemgetter(0), reverse=True)
            for _, message in messages:
                print(message)