Ejemplo n.º 1
0
class FeedbackRNN(BaseRecurrent):
    def __init__(self, dim, **kwargs):
        super(FeedbackRNN, self).__init__(**kwargs)
        self.dim = dim
        self.first_recurrent_layer = SimpleRecurrent(
            dim=self.dim, activation=Identity(), name='first_recurrent_layer',
            weights_init=initialization.Identity())
        self.second_recurrent_layer = SimpleRecurrent(
            dim=self.dim, activation=Identity(), name='second_recurrent_layer',
            weights_init=initialization.Identity())
        self.children = [self.first_recurrent_layer,
                         self.second_recurrent_layer]

    @recurrent(sequences=['inputs'], contexts=[],
               states=['first_states', 'second_states'],
               outputs=['first_states', 'second_states'])
    def apply(self, inputs, first_states=None, second_states=None):
        first_h = self.first_recurrent_layer.apply(
            inputs=inputs, states=first_states + second_states, iterate=False)
        second_h = self.second_recurrent_layer.apply(
            inputs=first_h, states=second_states, iterate=False)
        return first_h, second_h

    def get_dim(self, name):
        return (self.dim if name in ('inputs', 'first_states', 'second_states')
                else super(FeedbackRNN, self).get_dim(name))
    def __init__(self, batch_size, num_subwords, num_words, subword_embedding_size, input_vocab_size,
                 subword_RNN_hidden_state_size, add_one = True, **kwargs):

        super(CompositionalLayerToyBidirectional, self).__init__(**kwargs)

        self.batch_size = batch_size
        self.num_subwords = num_subwords # number of subwords which make up a word
        self.num_words = num_words  # number of words in the sentence
        self.subword_embedding_size = subword_embedding_size
        self.input_vocab_size = input_vocab_size
        self.subword_RNN_hidden_state_size = subword_RNN_hidden_state_size
        self.add_one = add_one #adds 1 to the backwards embeddings

        # create the look up table
        self.lookup = LookupTable(length=self.input_vocab_size, dim=self.subword_embedding_size, name='input_lookup')
        self.lookup.weights_init = Uniform(width=0.08)
        self.lookup.biases_init = Constant(0)

        # has one RNN which reads the subwords into a word embedding
        self.compositional_subword_to_word_RNN_forward = SimpleRecurrent(
            dim=self.subword_RNN_hidden_state_size, activation=Identity(), name='subword_RNN_forward',
            weights_init=Identity_init())

        self.compositional_subword_to_word_RNN_backward = SimpleRecurrent(
            dim=self.subword_RNN_hidden_state_size, activation=Identity(), name='subword_RNN_backward',
            weights_init=Identity_init())

        self.children = [self.lookup, self.compositional_subword_to_word_RNN_forward,
                         self.compositional_subword_to_word_RNN_backward]
Ejemplo n.º 3
0
class TestBidirectional(unittest.TestCase):
    def setUp(self):
        self.bidir = Bidirectional(weights_init=Orthogonal(),
                                   prototype=SimpleRecurrent(
                                       dim=3, activation=Tanh()))
        self.simple = SimpleRecurrent(dim=3, weights_init=Orthogonal(),
                                      activation=Tanh(), seed=1)
        self.bidir.allocate()
        self.simple.initialize()
        self.bidir.children[0].params[0].set_value(
            self.simple.params[0].get_value())
        self.bidir.children[1].params[0].set_value(
            self.simple.params[0].get_value())
        self.x_val = 0.1 * numpy.asarray(
            list(itertools.permutations(range(4))),
            dtype=floatX)
        self.x_val = (numpy.ones((24, 4, 3), dtype=floatX) *
                      self.x_val[..., None])
        self.mask_val = numpy.ones((24, 4), dtype=floatX)
        self.mask_val[12:24, 3] = 0

    def test(self):
        x = tensor.tensor3('x')
        mask = tensor.matrix('mask')
        calc_bidir = theano.function([x, mask],
                                     [self.bidir.apply(x, mask=mask)])
        calc_simple = theano.function([x, mask],
                                      [self.simple.apply(x, mask=mask)])
        h_bidir = calc_bidir(self.x_val, self.mask_val)[0]
        h_simple = calc_simple(self.x_val, self.mask_val)[0]
        h_simple_rev = calc_simple(self.x_val[::-1], self.mask_val[::-1])[0]

        assert_allclose(h_simple, h_bidir[..., :3], rtol=1e-04)
        assert_allclose(h_simple_rev, h_bidir[::-1, ...,  3:], rtol=1e-04)
Ejemplo n.º 4
0
 def __init__(self, dim, **kwargs):
     super(FeedbackRNN, self).__init__(**kwargs)
     self.dim = dim
     self.first_recurrent_layer = SimpleRecurrent(
         dim=self.dim, activation=Identity(), name='first_recurrent_layer',
         weights_init=initialization.Identity())
     self.second_recurrent_layer = SimpleRecurrent(
         dim=self.dim, activation=Identity(), name='second_recurrent_layer',
         weights_init=initialization.Identity())
     self.children = [self.first_recurrent_layer,
                      self.second_recurrent_layer]
Ejemplo n.º 5
0
def example5():
    """Bidir + simplereccurent. Adaptation from a unittest in blocks """
    
    bidir = Bidirectional(weights_init=Orthogonal(),
                               prototype=SimpleRecurrent(
                                   dim=3, activation=Tanh()))
    
    simple = SimpleRecurrent(dim=3, weights_init=Orthogonal(),
                                  activation=Tanh(), seed=1)
    
    bidir.allocate()
    simple.initialize()
    
    bidir.children[0].parameters[0].set_value(
        
        simple.parameters[0].get_value())
    
    bidir.children[1].parameters[0].set_value(        
        simple.parameters[0].get_value())
    
    #Initialize theano variables and functions
    x = tensor.tensor3('x')
    mask = tensor.matrix('mask')
 
    calc_bidir = theano.function([x, mask],
                                 [bidir.apply(x, mask=mask)])
    calc_simple = theano.function([x, mask],
                                  [simple.apply(x, mask=mask)])
 

    #Testing time
 
    x_val = 0.1 * np.asarray(
        list(itertools.permutations(range(4))),
        dtype=theano.config.floatX)
        
    x_val = (np.ones((24, 4, 3), dtype=theano.config.floatX) *
                  x_val[..., None])
                  
    mask_val = np.ones((24, 4), dtype=theano.config.floatX)
    mask_val[12:24, 3] = 0

    h_bidir = calc_bidir(x_val, mask_val)[0]
    h_simple = calc_simple(x_val, mask_val)[0]
    h_simple_rev = calc_simple(x_val[::-1], mask_val[::-1])[0]
    

    print(h_bidir)
    print(h_simple)
    print(h_simple_rev)
Ejemplo n.º 6
0
class MyRnn(BaseRecurrent): # Extend the base recurrent class to create one of your own
  def __init__(self, dim, **kwargs):
    super(MyRnn, self).__init__(**kwargs)
    self.dim = dim
    self.layer1 = SimpleRecurrent(dim=self.dim, activation=Identity(), name='recurrent layer 1', weights_init=initialization.Identity())
    self.layer2 = SimpleRecurrent(dim=self.dim, activation=Identity(), name='recurrent layer 2', weights_init=initialization.Identity())
    self.children = [self.layer1, self.layer2]

  def apply(self, inputs, first_states=None, second_states=None):
    first_h = self.layer1.apply(inputs=inputs, states=first_states, iterate=False)
    second_h = self.layer2.apply(inputs=first_h, states=second_states, iterate=False)
    return first_h, second_h

  def get_dim(self):
    pass
Ejemplo n.º 7
0
class TestSimpleRecurrent(unittest.TestCase):
    def setUp(self):
        self.simple = SimpleRecurrent(dim=3, weights_init=Constant(2),
                                      activation=Tanh())
        self.simple.initialize()

    def test_one_step(self):
        h0 = tensor.matrix('h0')
        x = tensor.matrix('x')
        mask = tensor.vector('mask')
        h1 = self.simple.apply(x, h0, mask=mask, iterate=False)
        next_h = theano.function(inputs=[h0, x, mask], outputs=[h1])

        h0_val = 0.1 * numpy.array([[1, 1, 0], [0, 1, 1]],
                                   dtype=theano.config.floatX)
        x_val = 0.1 * numpy.array([[1, 2, 3], [4, 5, 6]],
                                  dtype=theano.config.floatX)
        mask_val = numpy.array([1, 0]).astype(theano.config.floatX)
        h1_val = numpy.tanh(h0_val.dot(2 * numpy.ones((3, 3))) + x_val)
        h1_val = mask_val[:, None] * h1_val + (1 - mask_val[:, None]) * h0_val
        assert_allclose(h1_val, next_h(h0_val, x_val, mask_val)[0])

    def test_many_steps(self):
        x = tensor.tensor3('x')
        mask = tensor.matrix('mask')
        h = self.simple.apply(x, mask=mask, iterate=True)
        calc_h = theano.function(inputs=[x, mask], outputs=[h])

        x_val = 0.1 * numpy.asarray(list(itertools.permutations(range(4))),
                                    dtype=theano.config.floatX)
        x_val = numpy.ones((24, 4, 3),
                           dtype=theano.config.floatX) * x_val[..., None]
        mask_val = numpy.ones((24, 4), dtype=theano.config.floatX)
        mask_val[12:24, 3] = 0
        h_val = numpy.zeros((25, 4, 3), dtype=theano.config.floatX)
        for i in range(1, 25):
            h_val[i] = numpy.tanh(h_val[i - 1].dot(
                2 * numpy.ones((3, 3))) + x_val[i - 1])
            h_val[i] = (mask_val[i - 1, :, None] * h_val[i] +
                        (1 - mask_val[i - 1, :, None]) * h_val[i - 1])
        h_val = h_val[1:]
        assert_allclose(h_val, calc_h(x_val, mask_val)[0], rtol=1e-04)

        # Also test that initial state is a parameter
        initial_state, = VariableFilter(roles=[INITIAL_STATE])(
            ComputationGraph(h))
        assert is_shared_variable(initial_state)
        assert initial_state.name == 'initial_state'
Ejemplo n.º 8
0
    def __init__(self, dim_in, dim_hidden, dim_out, **kwargs):

        self.dim_in = dim_in
        self.dim_hidden = dim_hidden
        self.dim_out = dim_out

        self.input_layer = Linear(input_dim=self.dim_in, output_dim=self.dim_hidden,
                                weights_init=initialization.IsotropicGaussian(),
                                biases_init=initialization.Constant(0))
        self.input_layer.initialize()

        sparse_init = initialization.Sparse(num_init=15, weights_init=initialization.IsotropicGaussian())
        self.recurrent_layer = SimpleRecurrent(
                                dim=self.dim_hidden, activation=Tanh(), name="first_recurrent_layer",
                                weights_init=sparse_init,
                                biases_init=initialization.Constant(0.01))
        '''
        self.recurrent_layer = LSTM(dim=self.dim_hidden, activation=Tanh(),
                                    weights_init=initialization.IsotropicGaussian(std=0.001),
                                    biases_init=initialization.Constant(0.01))
        '''
        self.recurrent_layer.initialize()

        self.output_layer = Linear(input_dim=self.dim_hidden, output_dim=self.dim_out,
                                weights_init=initialization.Uniform(width=0.01),
                                biases_init=initialization.Constant(0.01))
        self.output_layer.initialize()

        self.children = [self.input_layer, self.recurrent_layer, self.output_layer]
class CompositionalLayerToyWithTables(Initializable):
    def __init__(self, batch_size, num_subwords, num_words, subword_embedding_size, input_vocab_size,
                 subword_RNN_hidden_state_size, **kwargs):

        super(CompositionalLayerToyWithTables, self).__init__(**kwargs)

        self.batch_size = batch_size
        self.num_subwords = num_subwords # number of subwords which make up a word
        self.num_words = num_words  # number of words in the sentence
        self.subword_embedding_size = subword_embedding_size
        self.input_vocab_size = input_vocab_size
        self.subword_RNN_hidden_state_size = subword_RNN_hidden_state_size

        # create the look up table
        self.lookup = LookupTable(length=self.input_vocab_size, dim=self.subword_embedding_size, name='input_lookup')
        self.lookup.weights_init = Uniform(width=0.08)
        self.lookup.biases_init = Constant(0)

        # has one RNN which reads the subwords into a word embedding
        self.compositional_subword_to_word_RNN = SimpleRecurrent(
            dim=self.subword_RNN_hidden_state_size, activation=Identity(), name='subword_RNN',
            weights_init=Identity_init())

        self.children = [self.lookup, self.compositional_subword_to_word_RNN]


    '''
    subword_id_input_ is a 3d tensor with the dimensions of shape = (num_words, num_subwords, batch_size).
    It is expected as a dtype=uint16 or equivalent

    subword_id_input_mask_ is a 3d tensor with the dimensions of shape = (num_words, num_subwords, batch_size).
    It is expected as a dtype=uint8 or equivalent and has binary values of 1 when there is data and zero otherwise.

    The look up table will return a 4d tensor with shape = (num_words, num_subwords, batch_size, embedding size)

    The RNN will eat up the subwords dimension, resulting in a
    3d tensor of shape = (num_words, batch_size, RNN_hidden_value_size), which is returned as 'word_embeddings'

    Also returned is a 2d tensor of shape = (num_words, batch_zize), which is the remaining mask indicated
    the length of the sentence for each sentence in the batch.  i.e., 1 when there is a word, 0 otherwise.
    '''
    @application(inputs=['subword_id_input_', 'subword_id_input_mask_'], outputs=['word_embeddings', 'word_embeddings_mask'])
    def apply(self, subword_id_input_, subword_id_input_mask_):
        ##shape = (num_words, num_subwords, batch_size, embedding size)
        subword_embeddings = self.lookup.apply(subword_id_input_)

        result, updates = theano.scan( #loop over each word and have the rnn eat up the subwords
            fn=lambda subword_embeddings, subword_id_input_mask_: self.compositional_subword_to_word_RNN.apply(subword_embeddings, mask=subword_id_input_mask_),
            sequences= [subword_embeddings, subword_id_input_mask_])

        word_embeddings = result.dimshuffle(1,0,2,3) #put the states as the last dimension
        #remove this line to see the RNN states
        word_embeddings  = word_embeddings[-1] #take only the last state, since we dont need the others

        #remove subword dim from mask
        #if subword is empty then word is emptry the word is emptry, if not then the word is used
        word_embeddings_mask = subword_id_input_mask_.max(axis=1)

        return word_embeddings, word_embeddings_mask
Ejemplo n.º 10
0
def test_saved_inner_graph():
    """Make sure that the original inner graph is saved."""
    x = tensor.tensor3()
    recurrent = SimpleRecurrent(dim=3, activation=Tanh())
    y = recurrent.apply(x)

    application_call = get_application_call(y)
    assert application_call.inner_inputs
    assert application_call.inner_outputs

    cg = ComputationGraph(application_call.inner_outputs)
    # Check that the inner scan graph is annotated
    # with `recurrent.apply`
    assert len(VariableFilter(application=recurrent.apply)(cg)) == 3
    # Check that the inner graph is equivalent to the one
    # produced by a stand-alone of `recurrent.apply`
    assert is_same_graph(application_call.inner_outputs[0],
                         recurrent.apply(*application_call.inner_inputs,
                                         iterate=False))
Ejemplo n.º 11
0
class TextRNN(object):

    def __init__(self, dim_in, dim_hidden, dim_out, **kwargs):

        self.dim_in = dim_in
        self.dim_hidden = dim_hidden
        self.dim_out = dim_out

        self.input_layer = Linear(input_dim=self.dim_in, output_dim=self.dim_hidden,
                                weights_init=initialization.IsotropicGaussian(),
                                biases_init=initialization.Constant(0))
        self.input_layer.initialize()

        sparse_init = initialization.Sparse(num_init=15, weights_init=initialization.IsotropicGaussian())
        self.recurrent_layer = SimpleRecurrent(
                                dim=self.dim_hidden, activation=Tanh(), name="first_recurrent_layer",
                                weights_init=sparse_init,
                                biases_init=initialization.Constant(0.01))
        '''
        self.recurrent_layer = LSTM(dim=self.dim_hidden, activation=Tanh(),
                                    weights_init=initialization.IsotropicGaussian(std=0.001),
                                    biases_init=initialization.Constant(0.01))
        '''
        self.recurrent_layer.initialize()

        self.output_layer = Linear(input_dim=self.dim_hidden, output_dim=self.dim_out,
                                weights_init=initialization.Uniform(width=0.01),
                                biases_init=initialization.Constant(0.01))
        self.output_layer.initialize()

        self.children = [self.input_layer, self.recurrent_layer, self.output_layer]

    '''
    @recurrent(sequences=['inputs'], 
            states=['states'],
            contexts=[],
            outputs=['states', 'output'])
    '''

    def run(self, inputs):
        output = self.output_layer.apply( self.recurrent_layer.apply(self.input_layer.apply(inputs)) )
        return output
class LanguageModelToy(Initializable):
    """
    This takes the word embeddings from CompositionalLayerToyWithTables and creates sentence embeddings

    Input is a 3d tensor with the dimensions of (num_words, num_subwords, batch_size) and
    a 3d tensor a mask of size (num_words, num_subwords, batch_size)

    All hidden state sizes are the same as the subword embedding size

    This returns a 3d tensor with dimenstions of (num_words = num RNN states, batch_size, sentence embedding size)
    """

    def __init__(self, batch_size, num_subwords, num_words, subword_embedding_size, input_vocab_size,
                 subword_RNN_hidden_state_size, LM_RNN_hidden_state_size, **kwargs):

        super(LanguageModelToy, self).__init__(**kwargs)
        self.batch_size = batch_size
        self.num_subwords = num_subwords # number of subwords which make up a word
        self.num_words = num_words  # number of words in the sentence
        self.subword_embedding_size = subword_embedding_size
        self.input_vocab_size = input_vocab_size
        self.subword_RNN_hidden_state_size = subword_RNN_hidden_state_size
        self.LM_RNN_hidden_state_size = LM_RNN_hidden_state_size

        self.compositional_layer = CompositionalLayerToyWithTables(self.batch_size, self.num_subwords, self.num_words,
                                                              self.subword_embedding_size, self.input_vocab_size,
                                                              self.subword_RNN_hidden_state_size, name='compositional_layer')

        # has one RNN which reads the word embeddings into a sentence embedding
        self.language_model_RNN = SimpleRecurrent(
            dim=self.LM_RNN_hidden_state_size, activation=Identity(), name='language_model_RNN',
            weights_init=Identity_init())

        self.children = [self.compositional_layer, self.language_model_RNN]

    @application(inputs=['subword_id_input_', 'subword_id_input_mask_'], outputs=['sentence_embeddings', 'sentence_embeddings_mask'])
    def apply(self, subword_id_input_, subword_id_input_mask_):
        """
        subword_id_input_ is a 3d tensor with the dimensions of shape = (num_words, num_subwords, batch_size).
        It is expected as a dtype=uint16 or equivalent

        subword_id_input_mask_ is a 3d tensor with the dimensions of shape = (num_words, num_subwords, batch_size).
        It is expected as a dtype=uint8 or equivalent and has binary values of 1 when there is data and zero otherwise.

        Returned is a 3d tensor of size (num_words = num RNN states, batch_size, sentence embedding size)
        Also returned is a 1d tensor of size (batch_size) describing if the sentence is valid of empty in the batch
        """

        word_embeddings, word_embeddings_mask = self.compositional_layer.apply(subword_id_input_, subword_id_input_mask_)
        sentence_embeddings = self.language_model_RNN.apply(word_embeddings, mask=word_embeddings_mask)

        sentence_embeddings_mask = word_embeddings_mask.max(axis=0).T

        return sentence_embeddings, sentence_embeddings_mask
Ejemplo n.º 13
0
def example():
    """ Simple reccurent example. Taken from : https://github.com/mdda/pycon.sg-2015_deep-learning/blob/master/ipynb/blocks-recurrent-docs.ipynb """
    x = tensor.tensor3('x')

    rnn = SimpleRecurrent(dim=3, activation=Identity(), weights_init=initialization.Identity())
    rnn.initialize()
    h = rnn.apply(x)

    f = theano.function([x], h)
    print(f(np.ones((3, 1, 3), dtype=theano.config.floatX))) 

    doubler = Linear(
                 input_dim=3, output_dim=3, weights_init=initialization.Identity(2),
                 biases_init=initialization.Constant(0))
    doubler.initialize()
    h_doubler = rnn.apply(doubler.apply(x))

    f = theano.function([x], h_doubler)
    print(f(np.ones((3, 1, 3), dtype=theano.config.floatX))) 

    #Initial State
    h0 = tensor.matrix('h0')
    h = rnn.apply(inputs=x, states=h0)

    f = theano.function([x, h0], h)
    print(f(np.ones((3, 1, 3), dtype=theano.config.floatX),
            np.ones((1, 3), dtype=theano.config.floatX))) 
Ejemplo n.º 14
0
def test_saved_inner_graph():
    """Make sure that the original inner graph is saved."""
    x = tensor.tensor3()
    recurrent = SimpleRecurrent(dim=3, activation=Tanh())
    y = recurrent.apply(x)

    application_call = get_application_call(y)
    assert application_call.inner_inputs
    assert application_call.inner_outputs

    cg = ComputationGraph(application_call.inner_outputs)
    # Check that the inner scan graph is annotated
    # with `recurrent.apply`
    assert len(VariableFilter(applications=[recurrent.apply])(cg)) == 3
    # Check that the inner graph is equivalent to the one
    # produced by a stand-alone of `recurrent.apply`
    assert is_same_graph(application_call.inner_outputs[0],
                         recurrent.apply(*application_call.inner_inputs,
                                         iterate=False))
Ejemplo n.º 15
0
 def setUp(self):
     self.bidir = Bidirectional(weights_init=Orthogonal(),
                                prototype=SimpleRecurrent(
                                    dim=3, activation=Tanh()))
     self.simple = SimpleRecurrent(dim=3, weights_init=Orthogonal(),
                                   activation=Tanh(), seed=1)
     self.bidir.allocate()
     self.simple.initialize()
     self.bidir.children[0].parameters[0].set_value(
         self.simple.parameters[0].get_value())
     self.bidir.children[1].parameters[0].set_value(
         self.simple.parameters[0].get_value())
     self.x_val = 0.1 * numpy.asarray(
         list(itertools.permutations(range(4))),
         dtype=theano.config.floatX)
     self.x_val = (numpy.ones((24, 4, 3), dtype=theano.config.floatX) *
                   self.x_val[..., None])
     self.mask_val = numpy.ones((24, 4), dtype=theano.config.floatX)
     self.mask_val[12:24, 3] = 0
    def __init__(self, batch_size, num_subwords, num_words, subword_embedding_size, input_vocab_size,
                 subword_RNN_hidden_state_size, LM_RNN_hidden_state_size, **kwargs):

        super(LanguageModelToy, self).__init__(**kwargs)
        self.batch_size = batch_size
        self.num_subwords = num_subwords # number of subwords which make up a word
        self.num_words = num_words  # number of words in the sentence
        self.subword_embedding_size = subword_embedding_size
        self.input_vocab_size = input_vocab_size
        self.subword_RNN_hidden_state_size = subword_RNN_hidden_state_size
        self.LM_RNN_hidden_state_size = LM_RNN_hidden_state_size

        self.compositional_layer = CompositionalLayerToyWithTables(self.batch_size, self.num_subwords, self.num_words,
                                                              self.subword_embedding_size, self.input_vocab_size,
                                                              self.subword_RNN_hidden_state_size, name='compositional_layer')

        # has one RNN which reads the word embeddings into a sentence embedding
        self.language_model_RNN = SimpleRecurrent(
            dim=self.LM_RNN_hidden_state_size, activation=Identity(), name='language_model_RNN',
            weights_init=Identity_init())

        self.children = [self.compositional_layer, self.language_model_RNN]
Ejemplo n.º 17
0
    def __init__(
        self,
        input_dim,
        state_dim,
        activation=Tanh(),
        state_weights_init=None,
        input_weights_init=None,
        biases_init=None,
        **kwargs
    ):
        super(SimpleRecurrentLayer, self).__init__(biases_init=biases_init, **kwargs)
        if state_weights_init is None:
            state_weights_init = init.IsotropicGaussian(0.01)
        if input_weights_init is None:
            input_weights_init = init.IsotropicGaussian(0.01)
        if biases_init is None:
            biases_init = init.Constant(0)

        self.input_transformation = Linear(
            input_dim=input_dim, output_dim=state_dim, weights_init=input_weights_init, biases_init=biases_init
        )
        self.rnn = SimpleRecurrent(dim=state_dim, activation=activation, weights_init=state_weights_init)
        self.children = [self.input_transformation, self.rnn]
Ejemplo n.º 18
0
    def __init__(self, dimension, alphabet_size, **kwargs):
        super(SimpleGenerator, self).__init__(**kwargs)
        lookup = LookupTable(alphabet_size, dimension)
        transition = SimpleRecurrent(
            activation=Tanh(),
            dim=dimension, name="transition")
        attention = SequenceContentAttention(
            state_names=transition.apply.states,
            attended_dim=dimension, match_dim=dimension, name="attention")
        readout = Readout(
            readout_dim=alphabet_size,
            source_names=[transition.apply.states[0],
                          attention.take_glimpses.outputs[0]],
            emitter=SoftmaxEmitter(name="emitter"),
            feedback_brick=LookupFeedback(alphabet_size, dimension),
            name="readout")
        generator = SequenceGenerator(
            readout=readout, transition=transition, attention=attention,
            name="generator")

        self.lookup = lookup
        self.generator = generator
        self.children = [lookup, generator]
Ejemplo n.º 19
0
    def __init__(self, dims=(88, 100, 100), **kwargs):
        super(Rnn, self).__init__(**kwargs)
        self.dims = dims

        self.input_transform = Linear(input_dim=dims[0], output_dim=dims[1],
                                      weights_init=IsotropicGaussian(0.01),
                                      # biases_init=Constant(0.0),
                                      use_bias=False,
                                      name="input_transfrom")

        self.gru_layer = SimpleRecurrent(dim=dims[1], activation=Tanh(),
                                         weights_init=IsotropicGaussian(0.01),
                                         biases_init=Constant(0.0),
                                         use_bias=True,
                                         name="gru_rnn_layer")

        # TODO: find a way to automatically set the output dim in case of lstm vs normal rnn
        self.linear_trans = Linear(input_dim=dims[1], output_dim=dims[2] * 4,
                                   weights_init=IsotropicGaussian(0.01),
                                   biases_init=Constant(0.0),
                                   use_bias=False,
                                   name="h2h_transform")

        self.lstm_layer = LSTM(dim=dims[2], activation=Tanh(),
                               weights_init=IsotropicGaussian(0.01),
                               biases_init=Constant(0.0),
                               use_bias=True,
                               name="lstm_rnn_layer")

        self.out_transform = MLP(activations=[Sigmoid()], dims=[dims[2], dims[0]],
                                 weights_init=IsotropicGaussian(0.01),
                                 use_bias=True,
                                 biases_init=Constant(0.0),
                                 name="out_layer")

        self.children = [self.input_transform, self.gru_layer, self.linear_trans,
                         self.lstm_layer, self.out_transform]
Ejemplo n.º 20
0
def example5():
    """Bidir + simplereccurent. Adaptation from a unittest in blocks """

    bidir = Bidirectional(weights_init=Orthogonal(),
                          prototype=SimpleRecurrent(dim=3, activation=Tanh()))

    simple = SimpleRecurrent(dim=3,
                             weights_init=Orthogonal(),
                             activation=Tanh(),
                             seed=1)

    bidir.allocate()
    simple.initialize()

    bidir.children[0].parameters[0].set_value(simple.parameters[0].get_value())

    bidir.children[1].parameters[0].set_value(simple.parameters[0].get_value())

    #Initialize theano variables and functions
    x = tensor.tensor3('x')
    mask = tensor.matrix('mask')

    calc_bidir = theano.function([x, mask], [bidir.apply(x, mask=mask)])
    calc_simple = theano.function([x, mask], [simple.apply(x, mask=mask)])

    #Testing time

    x_val = 0.1 * np.asarray(list(itertools.permutations(range(4))),
                             dtype=theano.config.floatX)

    x_val = (np.ones(
        (24, 4, 3), dtype=theano.config.floatX) * x_val[..., None])

    mask_val = np.ones((24, 4), dtype=theano.config.floatX)
    mask_val[12:24, 3] = 0

    h_bidir = calc_bidir(x_val, mask_val)[0]
    h_simple = calc_simple(x_val, mask_val)[0]
    h_simple_rev = calc_simple(x_val[::-1], mask_val[::-1])[0]

    print(h_bidir)
    print(h_simple)
    print(h_simple_rev)
Ejemplo n.º 21
0

def sgd(cost, params):
    grads = T.grad(cost=cost, wrt=params)
    updates = []
    for p, g in zip(params, grads):
        updates.append([p, p - g * learning_rate])
    return updates


# Computational Graph
input = T.tensor3('input')
mask = T.fmatrix('mask')
target = T.tensor3('target')
linear1 = Linear(name='linear1', input_dim=300, output_dim=128)
recurrent = SimpleRecurrent(name='recurrent', activation=Tanh(), dim=128)
linear2 = Linear(name='linear2', input_dim=128, output_dim=9)
softmax = Softmax()
bricks = [linear1, recurrent, linear2]
for brick in bricks:
    brick.weights_init = IsotropicGaussian(0.01)
    brick.biases_init = Constant(0)
    brick.initialize()

linear1_output = linear1.apply(input)
recurrent_output = recurrent.apply(linear1_output, mask=mask)
linear2_output = linear2.apply(recurrent_output)
shape = linear2_output.shape  # 100 * 29*9
output = softmax.apply(linear2_output.reshape(
    (-1,
     9))).reshape(shape)  # hameye dimension ha be gheyr az yeki k oon 9 hast.
Ejemplo n.º 22
0
class Rnn(Initializable, BaseRecurrent):
    def __init__(self, dims=(88, 100, 100), **kwargs):
        super(Rnn, self).__init__(**kwargs)
        self.dims = dims

        self.input_transform = Linear(input_dim=dims[0], output_dim=dims[1],
                                      weights_init=IsotropicGaussian(0.01),
                                      # biases_init=Constant(0.0),
                                      use_bias=False,
                                      name="input_transfrom")

        self.gru_layer = SimpleRecurrent(dim=dims[1], activation=Tanh(),
                                         weights_init=IsotropicGaussian(0.01),
                                         biases_init=Constant(0.0),
                                         use_bias=True,
                                         name="gru_rnn_layer")

        # TODO: find a way to automatically set the output dim in case of lstm vs normal rnn
        self.linear_trans = Linear(input_dim=dims[1], output_dim=dims[2] * 4,
                                   weights_init=IsotropicGaussian(0.01),
                                   biases_init=Constant(0.0),
                                   use_bias=False,
                                   name="h2h_transform")

        self.lstm_layer = LSTM(dim=dims[2], activation=Tanh(),
                               weights_init=IsotropicGaussian(0.01),
                               biases_init=Constant(0.0),
                               use_bias=True,
                               name="lstm_rnn_layer")

        self.out_transform = MLP(activations=[Sigmoid()], dims=[dims[2], dims[0]],
                                 weights_init=IsotropicGaussian(0.01),
                                 use_bias=True,
                                 biases_init=Constant(0.0),
                                 name="out_layer")

        self.children = [self.input_transform, self.gru_layer, self.linear_trans,
                         self.lstm_layer, self.out_transform]

    # @recurrent(sequences=['inputs', 'input_mask'], contexts=[],
    # states=['gru_state', 'lstm_state', 'lstm_cells'],
    # outputs=['gru_state', 'lstm_state', 'lstm_cells'])
    def rnn_apply(self, inputs, mask=None, gru_state=None, lstm_state=None, lstm_cells=None):
        input_transform = self.input_transform.apply(inputs)
        gru_state = self.gru_layer.apply(
            inputs=input_transform,
            # update_inputs=input_transform,
            # reset_inputs=input_transform,
            states=gru_state,
            mask=mask,
            iterate=False)
        lstm_transform = self.linear_trans.apply(gru_state)
        lstm_state, lstm_cells = self.lstm_layer.apply(inputs=lstm_transform, states=lstm_state,
                                                       cells=lstm_cells,
                                                       mask=mask, iterate=False)
        return gru_state, lstm_state, lstm_cells

    @recurrent(sequences=[], contexts=[],
               states=['inputs', 'gru_state', 'lstm_state', 'lstm_cells'],
               outputs=['inputs', 'gru_state', 'lstm_state', 'lstm_cells'])
    def rnn_generate(self, inputs=None, gru_state=None, lstm_state=None, lstm_cells=None):
        output = self.apply(inputs=inputs,
                            gru_state=gru_state,
                            lstm_state=lstm_state,
                            lstm_cells=lstm_cells,
                            iterate=False)
        return output, gru_state, lstm_state, lstm_cells


    @recurrent(sequences=['inputs', 'mask'], contexts=[],
               states=['gru_state', 'lstm_state', 'lstm_cells'],
               outputs=['output', 'gru_state', 'lstm_state', 'lstm_cells'])
    def apply(self, inputs, mask, gru_state=None, lstm_state=None, lstm_cells=None):
        # input_transform = self.input_transform.apply(inputs)
        # gru_state = self.gru_layer.apply(
        # inputs=input_transform,
        #     mask=mask,
        #     states=gru_state,
        #     iterate=False)
        # lstm_transform = self.linear_trans.apply(gru_state)
        # lstm_state, lstm_cells = self.lstm_layer.apply(inputs=lstm_transform, states=lstm_state,
        #                                                cells=lstm_cells,
        #                                                mask=mask, iterate=False)
        gru_state, lstm_state, lstm_cells = self.rnn_apply(inputs=inputs,
                                                           mask=mask,
                                                           gru_state=gru_state,
                                                           lstm_state=lstm_state,
                                                           lstm_cells=lstm_cells)

        output = 1.17 * self.out_transform.apply(lstm_state) * mask[:, None]
        return output, gru_state, lstm_state, lstm_cells


    def get_dim(self, name):
        dims = dict(zip(['outputs', 'gru_state', 'lstm_state'], self.dims))
        dims['lstm_cells'] = dims['lstm_state']
        return dims.get(name, None) or super(Rnn, self).get_dim(name)
Ejemplo n.º 23
0
def construct_model(vocab_size, embedding_dim, hidden_dim, activation):

    # Construct the model
    x = tensor.lmatrix('features')
    x_mask = tensor.fmatrix('features_mask')
    y = tensor.lmatrix('targets')
    # Batch X Time
    y_mask = tensor.fmatrix('targets_mask')
    # Batch X Time
    frequency_mask = tensor.fmatrix('frequency_mask')
    frequency_mask_mask = tensor.fmatrix('frequency_mask_mask')

    # Only for the validation
    last_word = tensor.lvector('last_word')

    lookup = LookupTable(length=vocab_size, dim=embedding_dim, name='lookup')

    linear = Linear(input_dim=embedding_dim,
                    output_dim=hidden_dim,
                    name="linear")
    hidden = SimpleRecurrent(dim=hidden_dim,
                             activation=activation,
                             name='hidden_recurrent')
    top_linear = Linear(input_dim=hidden_dim,
                        output_dim=vocab_size,
                        name="top_linear")

    # Return 3D Tensor: Batch X Time X embedding_dim
    embeddings = lookup.apply(x)
    # Give time as the first index: Time X Batch X embedding_dim
    embeddings = embeddings.dimshuffle(1, 0, 2)

    pre_recurrent = linear.apply(embeddings)

    after_recurrent = hidden.apply(inputs=pre_recurrent, mask=x_mask.T)[:-1]
    after_recurrent_last = after_recurrent[-1]

    presoft = top_linear.apply(after_recurrent)

    # Define the cost
    # Give y as a vector and reshape presoft to 2D tensor
    y = y.flatten()

    shape = presoft.shape
    presoft = presoft.dimshuffle(1, 0, 2)
    presoft = presoft.reshape((shape[0] * shape[1], shape[2]))

    # Build cost_matrix
    presoft = presoft - presoft.max(axis=1).dimshuffle(0, 'x')
    log_prob = presoft - \
        tensor.log(tensor.exp(presoft).sum(axis=1).dimshuffle(0, 'x'))
    flat_log_prob = log_prob.flatten()
    range_ = tensor.arange(y.shape[0])
    flat_indices = y + range_ * presoft.shape[1]
    cost_matrix = flat_log_prob[flat_indices]

    # Mask useless values from the cost_matrix
    cost_matrix = - cost_matrix * \
        y_mask.flatten() * frequency_mask.flatten() * \
        frequency_mask_mask.flatten()

    # Average the cost
    cost = cost_matrix.sum()
    cost = cost / (y_mask * frequency_mask).sum()

    # Initialize parameters
    for brick in (lookup, linear, hidden, top_linear):
        brick.weights_init = IsotropicGaussian(0.01)
        brick.biases_init = Constant(0.)
        brick.initialize()

    return cost
Ejemplo n.º 24
0
    def __init__(self, rnn_dims, num_actions, data_X_np=None, data_y_np=None, width=32, height=32):
        ###############################################################
        #
        #       Network and data setup
        #
        ##############################################################
        RNN_DIMS = 100
        NUM_ACTIONS = num_actions

        tensor5 = T.TensorType('float32', [False, True, True, True, True])
        self.x = T.tensor4('features')
        self.reward = T.tensor3('targets', dtype='float32')
        self.state = T.matrix('states', dtype='float32')

        self.hidden_states = [] # holds hidden states in np array form

        
        #data_X & data_Y supplied in init function now...

        if data_X_np is None or data_y_np is None:
            print 'you did not supply data at init'
            data_X_np = np.float32(np.random.normal(size=(1280, 1,1, width, height)))
            data_y_np = np.float32(np.random.normal(size=(1280, 1,1,1)))
        #data_states_np = np.float32(np.ones((1280, 1, 100)))
        state_shape = (data_X_np.shape[0],rnn_dims)
        self.data_states_np = np.float32(np.zeros(state_shape))


        self.datastream = IterableDataset(dict(features=data_X_np,
                                            targets=data_y_np,
                                            states=self.data_states_np)).get_example_stream()
        self.datastream_test = IterableDataset(dict(features=data_X_np,
                                            targets=data_y_np,
                                            states=self.data_states_np)).get_example_stream()
        data_X = self.datastream


        # 2 conv inputs
        # we want to take our sequence of input images and convert them to convolutional
        # representations
        conv_layers = [ConvolutionalLayer(Rectifier().apply, (3, 3), 16, (2, 2), name='l1'),
                       ConvolutionalLayer(Rectifier().apply, (3, 3), 32, (2, 2), name='l2'),
                       ConvolutionalLayer(Rectifier().apply, (3, 3), 64, (2, 2), name='l3'),
                       ConvolutionalLayer(Rectifier().apply, (3, 3), 128, (2, 2), name='l4'),
                       ConvolutionalLayer(Rectifier().apply, (3, 3), 128, (2, 2), name='l5'),
                       ConvolutionalLayer(Rectifier().apply, (3, 3), 128, (2, 2), name='l6')]
        convnet = ConvolutionalSequence(conv_layers, num_channels=4,
                                        image_size=(width, height),
                                        weights_init=init.Uniform(0, 0.01),
                                        biases_init=init.Constant(0.0),
                                        tied_biases=False,
                                        border_mode='full')
        convnet.initialize()
        output_dim = np.prod(convnet.get_dim('output'))

        conv_out = convnet.apply(self.x)

        reshape_dims = (conv_out.shape[0], conv_out.shape[1]*conv_out.shape[2]*conv_out.shape[3])
        hidden_repr = conv_out.reshape(reshape_dims)
        conv2rnn = Linear(input_dim=output_dim, output_dim=RNN_DIMS, 
                            weights_init=init.Uniform(width=0.01),
                            biases_init=init.Constant(0.))
        conv2rnn.initialize()
        conv2rnn_output = conv2rnn.apply(hidden_repr)

        # RNN hidden layer
        # then we want to feed those conv representations into an RNN
        rnn = SimpleRecurrent(dim=RNN_DIMS, activation=Rectifier(), weights_init=init.Uniform(width=0.01))
        rnn.initialize()
        self.learned_state = rnn.apply(inputs=conv2rnn_output, states=self.state, iterate=False)


        # linear output from hidden layer
        # the RNN has two outputs, but only this one has a target. That is, this is "expected return"
        # which the network attempts to minimize difference between expected return and actual return
        lin_output = Linear(input_dim=RNN_DIMS, output_dim=1, 
                            weights_init=init.Uniform(width=0.01),
                            biases_init=init.Constant(0.))
        lin_output.initialize()
        self.exp_reward = lin_output.apply(self.learned_state)
        self.get_exp_reward = theano.function([self.x, self.state], self.exp_reward)

        # softmax output from hidden layer
        # this provides a softmax of action recommendations
        # the hypothesis is that adjusting the other outputs magically influences this set of outputs
        # to suggest smarter (or more realistic?) moves
        action_output = Linear(input_dim=RNN_DIMS, output_dim=NUM_ACTIONS, 
                            weights_init=init.Constant(.001), 
                            biases_init=init.Constant(0.))
        action_output.initialize()

        self.suggested_actions = Softmax().apply(action_output.apply(self.learned_state[-1]))

        ######################
        # use this to get suggested actions... it requires the state of the hidden units from the previous
        # timestep
        #####################
        self.get_suggested_actions = theano.function([self.x, self.state], [self.suggested_actions, self.learned_state])
Ejemplo n.º 25
0
def main(mode, save_path, num_batches, data_path=None):
    # Experiment configuration
    dimension = 100
    readout_dimension = len(char2code)

    # Build bricks
    encoder = Bidirectional(SimpleRecurrent(dim=dimension, activation=Tanh()),
                            weights_init=Orthogonal())
    fork = Fork(
        [name for name in encoder.prototype.apply.sequences if name != 'mask'],
        weights_init=IsotropicGaussian(0.1),
        biases_init=Constant(0))
    fork.input_dim = dimension
    fork.output_dims = {name: dimension for name in fork.input_names}
    lookup = LookupTable(readout_dimension,
                         dimension,
                         weights_init=IsotropicGaussian(0.1))
    transition = SimpleRecurrent(activation=Tanh(),
                                 dim=dimension,
                                 name="transition")
    attention = SequenceContentAttention(state_names=transition.apply.states,
                                         sequence_dim=2 * dimension,
                                         match_dim=dimension,
                                         name="attention")
    readout = LinearReadout(readout_dim=readout_dimension,
                            source_names=["states"],
                            emitter=SoftmaxEmitter(name="emitter"),
                            feedbacker=LookupFeedback(readout_dimension,
                                                      dimension),
                            name="readout")
    generator = SequenceGenerator(readout=readout,
                                  transition=transition,
                                  attention=attention,
                                  weights_init=IsotropicGaussian(0.1),
                                  biases_init=Constant(0),
                                  name="generator")
    generator.push_initialization_config()
    transition.weights_init = Orthogonal()

    if mode == "train":
        # Data processing pipeline
        dataset_options = dict(dictionary=char2code,
                               level="character",
                               preprocess=_lower)
        if data_path:
            dataset = TextFile(data_path, **dataset_options)
        else:
            dataset = OneBillionWord("training", [99], **dataset_options)
        data_stream = DataStreamMapping(
            mapping=_transpose,
            data_stream=PaddingDataStream(
                BatchDataStream(
                    iteration_scheme=ConstantScheme(10),
                    data_stream=DataStreamMapping(
                        mapping=reverse_words,
                        add_sources=("targets", ),
                        data_stream=DataStreamFilter(
                            predicate=_filter_long,
                            data_stream=dataset.get_default_stream())))))

        # Build the cost computation graph
        chars = tensor.lmatrix("features")
        chars_mask = tensor.matrix("features_mask")
        targets = tensor.lmatrix("targets")
        targets_mask = tensor.matrix("targets_mask")
        batch_cost = generator.cost(
            targets,
            targets_mask,
            attended=encoder.apply(**dict_union(fork.apply(
                lookup.lookup(chars), return_dict=True),
                                                mask=chars_mask)),
            attended_mask=chars_mask).sum()
        batch_size = named_copy(chars.shape[1], "batch_size")
        cost = aggregation.mean(batch_cost, batch_size)
        cost.name = "sequence_log_likelihood"
        logger.info("Cost graph is built")

        # Give an idea of what's going on
        model = Model(cost)
        params = model.get_params()
        logger.info("Parameters:\n" +
                    pprint.pformat([(key, value.get_value().shape)
                                    for key, value in params.items()],
                                   width=120))

        # Initialize parameters
        for brick in model.get_top_bricks():
            brick.initialize()

        # Fetch variables useful for debugging
        max_length = named_copy(chars.shape[0], "max_length")
        cost_per_character = named_copy(
            aggregation.mean(batch_cost, batch_size * max_length),
            "character_log_likelihood")
        cg = ComputationGraph(cost)
        (energies, ) = VariableFilter(application=readout.readout,
                                      name="output")(cg.variables)
        min_energy = named_copy(energies.min(), "min_energy")
        max_energy = named_copy(energies.max(), "max_energy")
        (activations, ) = VariableFilter(
            application=generator.transition.apply,
            name="states")(cg.variables)
        mean_activation = named_copy(
            abs(activations).mean(), "mean_activation")

        # Define the training algorithm.
        algorithm = GradientDescent(cost=cost,
                                    step_rule=CompositeRule(
                                        [StepClipping(10.0),
                                         Scale(0.01)]))

        # More variables for debugging
        observables = [
            cost, min_energy, max_energy, mean_activation, batch_size,
            max_length, cost_per_character, algorithm.total_step_norm,
            algorithm.total_gradient_norm
        ]
        for name, param in params.items():
            observables.append(named_copy(param.norm(2), name + "_norm"))
            observables.append(
                named_copy(algorithm.gradients[param].norm(2),
                           name + "_grad_norm"))

        # Construct the main loop and start training!
        average_monitoring = TrainingDataMonitoring(observables,
                                                    prefix="average",
                                                    every_n_batches=10)
        main_loop = MainLoop(
            model=model,
            data_stream=data_stream,
            algorithm=algorithm,
            extensions=[
                Timing(),
                TrainingDataMonitoring(observables, after_every_batch=True),
                average_monitoring,
                FinishAfter(after_n_batches=num_batches).add_condition(
                    "after_batch", _is_nan),
                Plot(os.path.basename(save_path),
                     [[average_monitoring.record_name(cost)],
                      [average_monitoring.record_name(cost_per_character)]],
                     every_n_batches=10),
                SerializeMainLoop(save_path,
                                  every_n_batches=500,
                                  save_separately=["model", "log"]),
                Printing(every_n_batches=1)
            ])
        main_loop.run()
    elif mode == "test":
        logger.info("Model is loaded")
        chars = tensor.lmatrix("features")
        generated = generator.generate(
            n_steps=3 * chars.shape[0],
            batch_size=chars.shape[1],
            attended=encoder.apply(**dict_union(
                fork.apply(lookup.lookup(chars), return_dict=True))),
            attended_mask=tensor.ones(chars.shape))
        model = Model(generated)
        model.set_param_values(load_parameter_values(save_path))
        sample_function = model.get_theano_function()
        logging.info("Sampling function is compiled")

        while True:
            # Python 2-3 compatibility
            line = input("Enter a sentence\n")
            batch_size = int(input("Enter a number of samples\n"))
            encoded_input = [
                char2code.get(char, char2code["<UNK>"])
                for char in line.lower().strip()
            ]
            encoded_input = ([char2code['<S>']] + encoded_input +
                             [char2code['</S>']])
            print("Encoder input:", encoded_input)
            target = reverse_words((encoded_input, ))[0]
            print("Target: ", target)
            states, samples, glimpses, weights, costs = sample_function(
                numpy.repeat(numpy.array(encoded_input)[:, None],
                             batch_size,
                             axis=1))

            messages = []
            for i in range(samples.shape[1]):
                sample = list(samples[:, i])
                try:
                    true_length = sample.index(char2code['</S>']) + 1
                except ValueError:
                    true_length = len(sample)
                sample = sample[:true_length]
                cost = costs[:true_length, i].sum()
                message = "({})".format(cost)
                message += "".join(code2char[code] for code in sample)
                if sample == target:
                    message += " CORRECT!"
                messages.append((cost, message))
            messages.sort(key=operator.itemgetter(0), reverse=True)
            for _, message in messages:
                print(message)
Ejemplo n.º 26
0
def test_sequence_generator_with_lm():
    floatX = theano.config.floatX
    rng = numpy.random.RandomState(1234)

    readout_dim = 5
    feedback_dim = 3
    dim = 20
    batch_size = 30
    n_steps = 10

    transition = GatedRecurrent(dim=dim,
                                activation=Tanh(),
                                weights_init=Orthogonal())
    language_model = SequenceGenerator(Readout(
        readout_dim=readout_dim,
        source_names=["states"],
        emitter=SoftmaxEmitter(theano_seed=1234),
        feedback_brick=LookupFeedback(readout_dim, dim, name='feedback')),
                                       SimpleRecurrent(dim, Tanh()),
                                       name='language_model')
    generator = SequenceGenerator(Readout(
        readout_dim=readout_dim,
        source_names=["states", "lm_states"],
        emitter=SoftmaxEmitter(theano_seed=1234),
        feedback_brick=LookupFeedback(readout_dim, feedback_dim)),
                                  transition,
                                  language_model=language_model,
                                  weights_init=IsotropicGaussian(0.1),
                                  biases_init=Constant(0),
                                  seed=1234)
    generator.initialize()

    # Test 'cost_matrix' method
    y = tensor.lmatrix('y')
    y.tag.test_value = numpy.zeros((15, batch_size), dtype='int64')
    mask = tensor.matrix('mask')
    mask.tag.test_value = numpy.ones((15, batch_size))

    costs = generator.cost_matrix(y, mask)
    assert costs.ndim == 2
    costs_fun = theano.function([y, mask], [costs])
    y_test = rng.randint(readout_dim, size=(n_steps, batch_size))
    m_test = numpy.ones((n_steps, batch_size), dtype=floatX)
    costs_val = costs_fun(y_test, m_test)[0]
    assert costs_val.shape == (n_steps, batch_size)
    assert_allclose(costs_val.sum(), 483.153, rtol=1e-5)

    # Test 'cost' method
    cost = generator.cost(y, mask)
    assert cost.ndim == 0
    cost_val = theano.function([y, mask], cost)(y_test, m_test)
    assert_allclose(cost_val, 16.105, rtol=1e-5)

    # Test 'AUXILIARY' variable 'per_sequence_element' in 'cost' method
    cg = ComputationGraph([cost])
    var_filter = VariableFilter(roles=[AUXILIARY])
    aux_var_name = '_'.join(
        [generator.name, generator.cost.name, 'per_sequence_element'])
    cost_per_el = [
        el for el in var_filter(cg.variables) if el.name == aux_var_name
    ][0]
    assert cost_per_el.ndim == 0
    cost_per_el_val = theano.function([y, mask], [cost_per_el])(y_test, m_test)
    assert_allclose(cost_per_el_val, 1.61051, rtol=1e-5)

    # Test generate
    states, outputs, lm_states, costs = generator.generate(
        iterate=True, batch_size=batch_size, n_steps=n_steps)
    cg = ComputationGraph([states, outputs, costs])
    states_val, outputs_val, costs_val = theano.function(
        [], [states, outputs, costs], updates=cg.updates)()
    assert states_val.shape == (n_steps, batch_size, dim)
    assert outputs_val.shape == (n_steps, batch_size)
    assert outputs_val.dtype == 'int64'
    assert costs_val.shape == (n_steps, batch_size)
    assert_allclose(states_val.sum(), -4.88367, rtol=1e-5)
    assert_allclose(costs_val.sum(), 486.681, rtol=1e-5)
    assert outputs_val.sum() == 627

    # Test masks agnostic results of cost
    cost1 = costs_fun([[1], [2]], [[1], [1]])[0]
    cost2 = costs_fun([[3, 1], [4, 2], [2, 0]], [[1, 1], [1, 1], [1, 0]])[0]
    assert_allclose(cost1.sum(), cost2[:, 1].sum(), rtol=1e-5)
Ejemplo n.º 27
0
 def setUp(self):
     self.simple = SimpleRecurrent(dim=3, weights_init=Constant(2),
                                   activation=Tanh())
     self.simple.initialize()
def test_sequence_generator():
    """Test a sequence generator with no contexts and continuous outputs.

    Such sequence generators can be used to model e.g. dynamical systems.

    """
    rng = numpy.random.RandomState(1234)

    output_dim = 1
    dim = 20
    batch_size = 30
    n_steps = 10

    transition = SimpleRecurrent(activation=Tanh(), dim=dim,
                                 weights_init=Orthogonal())
    generator = SequenceGenerator(
        Readout(readout_dim=output_dim, source_names=["states"],
                emitter=TestEmitter()),
        transition,
        weights_init=IsotropicGaussian(0.1), biases_init=Constant(0.0),
        seed=1234)
    generator.initialize()

    # Test 'cost_matrix' method
    y = tensor.tensor3('y')
    mask = tensor.matrix('mask')
    costs = generator.cost_matrix(y, mask)
    assert costs.ndim == 2
    y_test = rng.uniform(size=(n_steps, batch_size, output_dim)).astype(floatX)
    m_test = numpy.ones((n_steps, batch_size), dtype=floatX)
    costs_val = theano.function([y, mask], [costs])(y_test, m_test)[0]
    assert costs_val.shape == (n_steps, batch_size)
    assert_allclose(costs_val.sum(), 115.593, rtol=1e-5)

    # Test 'cost' method
    cost = generator.cost(y, mask)
    assert cost.ndim == 0
    cost_val = theano.function([y, mask], [cost])(y_test, m_test)
    assert_allclose(cost_val, 3.8531, rtol=1e-5)

    # Test 'AUXILIARY' variable 'per_sequence_element' in 'cost' method
    cg = ComputationGraph([cost])
    var_filter = VariableFilter(roles=[AUXILIARY])
    aux_var_name = '_'.join([generator.name, generator.cost.name,
                             'per_sequence_element'])
    cost_per_el = [el for el in var_filter(cg.variables)
                   if el.name == aux_var_name][0]
    assert cost_per_el.ndim == 0
    cost_per_el_val = theano.function([y, mask], [cost_per_el])(y_test, m_test)
    assert_allclose(cost_per_el_val, 0.38531, rtol=1e-5)

    # Test 'generate' method
    states, outputs, costs = [variable.eval() for variable in
                              generator.generate(
                                  states=rng.uniform(
                                      size=(batch_size, dim)).astype(floatX),
                                  iterate=True, batch_size=batch_size,
                                  n_steps=n_steps)]
    assert states.shape == (n_steps, batch_size, dim)
    assert outputs.shape == (n_steps, batch_size, output_dim)
    assert costs.shape == (n_steps, batch_size)
    assert_allclose(outputs.sum(), -0.33683, rtol=1e-5)
    assert_allclose(states.sum(), 15.7909, rtol=1e-5)
    # There is no generation cost in this case, since generation is
    # deterministic
    assert_allclose(costs.sum(), 0.0)
Ejemplo n.º 29
0
def main(num_epochs=100):
    x = tensor.matrix('features')
    m = tensor.matrix('features_mask')

    x_int = x.astype(dtype='int32').T
    train_dataset = TextFile('inspirational.txt')
    train_dataset.indexables[0] = numpy.array(sorted(
        train_dataset.indexables[0], key=len
    ))

    n_voc = len(train_dataset.dict.keys())

    init_probs = numpy.array(
        [sum(filter(lambda idx:idx == w,
                    [s[0] for s in train_dataset.indexables[
                        train_dataset.sources.index('features')]]
                    )) for w in xrange(n_voc)],
        dtype=theano.config.floatX
    )
    init_probs = init_probs / init_probs.sum()

    n_h = 100
    linear_embedding = LookupTable(
        length=n_voc,
        dim=n_h,
        weights_init=Uniform(std=0.01),
        biases_init=Constant(0.)
    )
    linear_embedding.initialize()
    lstm_biases = numpy.zeros(4 * n_h).astype(dtype=theano.config.floatX)
    lstm_biases[n_h:(2 * n_h)] = 4.
    rnn = SimpleRecurrent(
        dim=n_h,
        activation=Tanh(),
        weights_init=Uniform(std=0.01),
        biases_init=Constant(0.)
    )
    rnn.initialize()
    score_layer = Linear(
        input_dim=n_h,
        output_dim=n_voc,
        weights_init=Uniform(std=0.01),
        biases_init=Constant(0.)
    )
    score_layer.initialize()

    embedding = (linear_embedding.apply(x_int[:-1])
                 * tensor.shape_padright(m.T[1:]))
    rnn_out = rnn.apply(inputs=embedding, mask=m.T[1:])
    probs = softmax(
        sequence_map(score_layer.apply, rnn_out, mask=m.T[1:])[0]
    )
    idx_mask = m.T[1:].nonzero()
    cost = CategoricalCrossEntropy().apply(
        x_int[1:][idx_mask[0], idx_mask[1]],
        probs[idx_mask[0], idx_mask[1]]
    )
    cost.name = 'cost'
    misclassification = MisclassificationRate().apply(
        x_int[1:][idx_mask[0], idx_mask[1]],
        probs[idx_mask[0], idx_mask[1]]
    )
    misclassification.name = 'misclassification'

    cg = ComputationGraph([cost])
    params = cg.parameters

    algorithm = GradientDescent(
        cost=cost,
        params=params,
        step_rule=Adam()
    )

    train_data_stream = Padding(
        data_stream=DataStream(
            dataset=train_dataset,
            iteration_scheme=BatchwiseShuffledScheme(
                examples=train_dataset.num_examples,
                batch_size=10,
            )
        ),
        mask_sources=('features',)
    )

    model = Model(cost)

    extensions = []
    extensions.append(Timing())
    extensions.append(FinishAfter(after_n_epochs=num_epochs))
    extensions.append(TrainingDataMonitoring(
        [cost, misclassification],
        prefix='train',
        after_epoch=True))

    batch_size = 10
    length = 30
    trng = MRG_RandomStreams(18032015)
    u = trng.uniform(size=(length, batch_size, n_voc))
    gumbel_noise = -tensor.log(-tensor.log(u))
    init_samples = (tensor.log(init_probs).dimshuffle(('x', 0))
                    + gumbel_noise[0]).argmax(axis=-1)
    init_states = rnn.initial_state('states', batch_size)

    def sampling_step(g_noise, states, samples_step):
        embedding_step = linear_embedding.apply(samples_step)
        next_states = rnn.apply(inputs=embedding_step,
                                            states=states,
                                            iterate=False)
        probs_step = softmax(score_layer.apply(next_states))
        next_samples = (tensor.log(probs_step)
                        + g_noise).argmax(axis=-1)

        return next_states, next_samples

    [_, samples], _ = theano.scan(
        fn=sampling_step,
        sequences=[gumbel_noise[1:]],
        outputs_info=[init_states, init_samples]
    )

    sampling = theano.function([], samples.owner.inputs[0].T)

    plotters = []
    plotters.append(Plotter(
        channels=[['train_cost', 'train_misclassification']],
        titles=['Costs']))

    extensions.append(PlotManager('Language modelling example',
                                  plotters=plotters,
                                  after_epoch=True,
                                  after_training=True))
    extensions.append(Printing())
    extensions.append(PrintSamples(sampler=sampling,
                                   voc=train_dataset.inv_dict))

    main_loop = MainLoop(model=model,
                         data_stream=train_data_stream,
                         algorithm=algorithm,
                         extensions=extensions)

    main_loop.run()
Ejemplo n.º 30
0
so let's think about sizes of the arrays...
"""

x = tensor.matrix('tokens', dtype="int32")

x_mask = tensor.matrix('tokens_mask', dtype=floatX)
#rnn.apply(inputs=input_to_hidden.apply(x), mask=x_mask)

lookup = LookupTable(vocab_size, embedding_dim)

x_extra = tensor.tensor3('extras', dtype=floatX)

rnn = Bidirectional(
    SimpleRecurrent(
        dim=hidden_dim,
        activation=Tanh(),
        weights_init=IsotropicGaussian(0.01),
        biases_init=Constant(0),
    ), )

### Will need to reshape the rnn outputs to produce suitable input here...
gather = Linear(name='hidden_to_output',
                input_dim=hidden_dim * 2,
                output_dim=labels_size,
                weights_init=IsotropicGaussian(0.01),
                biases_init=Constant(0))

p_labels = Softmax()

## Let's initialize the variables
lookup.allocate()
#print("lookup.parameters=", lookup.parameters)                         # ('lookup.parameters=', [W])
Ejemplo n.º 31
0
floatX = theano.config.floatX

n_epochs = 30
x_dim = 1
h_dim = 100
o_dim = 10
batch_size = 50

print 'Building model ...'
# T x B x F
x = tensor.tensor3('x', dtype=floatX)
y = tensor.tensor3('y', dtype='int32')

x_to_h1 = Linear(name='x_to_h1', input_dim=x_dim, output_dim=h_dim)
pre_rnn = x_to_h1.apply(x)
rnn = SimpleRecurrent(activation=Rectifier(), dim=h_dim, name="rnn")
h1 = rnn.apply(pre_rnn)
h1_to_o = Linear(name='h1_to_o', input_dim=h_dim, output_dim=o_dim)
pre_softmax = h1_to_o.apply(h1)
softmax = Softmax()
shape = pre_softmax.shape
softmax_out = softmax.apply(pre_softmax.reshape((-1, o_dim)))
softmax_out = softmax_out.reshape(shape)
softmax_out.name = 'softmax_out'

# comparing only last time-step
cost = CategoricalCrossEntropy().apply(y[-1, :, 0], softmax_out[-1])
cost.name = 'CrossEntropy'
error_rate = MisclassificationRate().apply(y[-1, :, 0], softmax_out[-1])
error_rate.name = 'error_rate'
Ejemplo n.º 32
0
def rnn_layer(dim, h, n):
    linear = Linear(input_dim=dim, output_dim=dim, name='linear' + str(n))
    rnn = SimpleRecurrent(dim=dim, activation=Tanh(), name='rnn' + str(n))
    initialize([linear, rnn])
    return rnn.apply(linear.apply(h))
Ejemplo n.º 33
0
def main(model_path, recurrent_type):
    dataset_options = dict(dictionary=char2code, level="character",
                           preprocess=_lower)
    dataset = OneBillionWord("training", [99], **dataset_options)
    data_stream = dataset.get_example_stream()
    data_stream = Filter(data_stream, _filter_long)
    data_stream = Mapping(data_stream, _make_target,
                          add_sources=('target',))
    data_stream = Batch(data_stream, iteration_scheme=ConstantScheme(100))
    data_stream = Padding(data_stream)
    data_stream = Mapping(data_stream, _transpose)

    features = tensor.lmatrix('features')
    features_mask = tensor.matrix('features_mask')
    target = tensor.lmatrix('target')
    target_mask = tensor.matrix('target_mask')

    dim = 100
    lookup = LookupTable(len(all_chars), dim,
                         weights_init=IsotropicGaussian(0.01),
                         biases_init=Constant(0.))

    if recurrent_type == 'lstm':
        rnn = LSTM(dim / 4, Tanh(),
                   weights_init=IsotropicGaussian(0.01),
                   biases_init=Constant(0.))
    elif recurrent_type == 'simple':
        rnn = SimpleRecurrent(dim, Tanh())
        rnn = Bidirectional(rnn,
                            weights_init=IsotropicGaussian(0.01),
                            biases_init=Constant(0.))
    else:
        raise ValueError('Not known RNN type')
    rnn.initialize()
    lookup.initialize()
    y_hat = rnn.apply(lookup.apply(features), mask=features_mask)

    print len(all_chars)
    linear = Linear(2 * dim, len(all_chars),
                    weights_init=IsotropicGaussian(0.01),
                    biases_init=Constant(0.))
    linear.initialize()
    y_hat = linear.apply(y_hat)
    seq_lenght = y_hat.shape[0]
    batch_size = y_hat.shape[1]
    y_hat = Softmax().apply(y_hat.reshape((seq_lenght * batch_size, -1))).reshape(y_hat.shape)
    cost = CategoricalCrossEntropy().apply(
        target.flatten(),
        y_hat.reshape((-1, len(all_chars)))) * seq_lenght * batch_size
    cost.name = 'cost'
    cost_per_character = cost / features_mask.sum()
    cost_per_character.name = 'cost_per_character'

    cg = ComputationGraph([cost, cost_per_character])
    model = Model(cost)
    algorithm = GradientDescent(step_rule=Adam(), cost=cost,
                                params=cg.parameters)

    train_monitor = TrainingDataMonitoring(
        [cost, cost_per_character], prefix='train',
        after_batch=True)
    extensions = [train_monitor, Printing(every_n_batches=40),
                  Dump(model_path, every_n_batches=200),
                  #Checkpoint('rnn.pkl', every_n_batches=200)
                  ]
    main_loop = MainLoop(model=model, algorithm=algorithm,
                         data_stream=data_stream, extensions=extensions)
    main_loop.run()
Ejemplo n.º 34
0
    dim=hidden_layer_dim,
    weights_init=initialization.Uniform(width=0.01),
    biases_init=Constant(0))
lookup_input.initialize()

linear_input = Linear(
    name='linear_input',
    input_dim=hidden_layer_dim,
    output_dim=hidden_layer_dim,
    weights_init=initialization.Uniform(width=0.01),
    biases_init=Constant(0))
linear_input.initialize()

rnn = SimpleRecurrent(
    name='hidden',
    dim=hidden_layer_dim,
    activation=Tanh(),
    weights_init=initialization.Uniform(width=0.01))
rnn.initialize()

linear_output = Linear(
    name='linear_output',
    input_dim=hidden_layer_dim,
    output_dim=charset_size,
    weights_init=initialization.Uniform(width=0.01),
    biases_init=Constant(0))
linear_output.initialize()

softmax = NDimensionalSoftmax(name='ndim_softmax')

activation_input = lookup_input.apply(x)
Ejemplo n.º 35
0
        inputs_mask = numpy.max(data[b'mask_inputs'], axis=-1)
        labels_mask = data[b'mask_labels']

print('Building model ...')
# T x B x F
x = tensor.tensor3('x', dtype=floatX)
# T x B
x_mask = tensor.matrix('x_mask', dtype=floatX)
# L x B
y = tensor.matrix('y', dtype=floatX)
# L x B
y_mask = tensor.matrix('y_mask', dtype=floatX)

x_to_h = Linear(name='x_to_h', input_dim=x_dim, output_dim=h_dim)
x_transform = x_to_h.apply(x)
rnn = SimpleRecurrent(activation=Tanh(), dim=h_dim, name="rnn")
h = rnn.apply(x_transform)
h_to_o = Linear(name='h_to_o', input_dim=h_dim, output_dim=num_classes + 1)
h_transform = h_to_o.apply(h)
# T x B x C+1
y_hat = tensor.nnet.softmax(h_transform.reshape(
    (-1, num_classes + 1))).reshape((h.shape[0], h.shape[1], -1))
y_hat.name = 'y_hat'

y_hat_mask = x_mask
cost = CTC().apply(y, y_hat, y_mask, y_hat_mask, 'normal_scale')
cost.name = 'CTC'
# Initialization
for brick in (rnn, x_to_h, h_to_o):
    brick.weights_init = IsotropicGaussian(0.01)
    brick.biases_init = Constant(0)
Ejemplo n.º 36
0
x = tensor.tensor3('features')
y = tensor.matrix('targets')

n_batchs = 1000
h_dim = 2
x_dim = 2

encode = Linear(name='encode', input_dim=x_dim, output_dim=h_dim)

gates = Linear(name='gates', input_dim=x_dim, output_dim=2 * h_dim)

#lstm = LSTM(activation=Tanh(),
#            dim=h_dim, name="lstm")

lstm = SimpleRecurrent(dim=h_dim, activation=Tanh())

#lstm = GatedRecurrent(dim=h_dim,
#                      activation=Tanh())

decode = Linear(name='decode', input_dim=h_dim, output_dim=1)

for brick in (encode, gates, decode):
    brick.weights_init = IsotropicGaussian(0.01)
    brick.biases_init = Constant(0.)
    brick.initialize()

lstm.weights_init = IsotropicGaussian(0.01)
#lstm.weights_init = Orthogonal()
lstm.biases_init = Constant(0.)
lstm.initialize()
Ejemplo n.º 37
0
n_batchs = 1000
h_dim = 2
x_dim = 2

encode = Linear(name='encode',
                input_dim=x_dim,
                output_dim=h_dim)

gates  = Linear(name = 'gates',
                input_dim = x_dim,
                output_dim = 2*h_dim)

#lstm = LSTM(activation=Tanh(),
#            dim=h_dim, name="lstm")

lstm = SimpleRecurrent(dim=h_dim,
                       activation=Tanh())

#lstm = GatedRecurrent(dim=h_dim,
#                      activation=Tanh())

decode = Linear(name='decode',
                input_dim=h_dim,
                output_dim=1)

for brick in (encode, gates, decode):
    brick.weights_init = IsotropicGaussian(0.01)
    brick.biases_init = Constant(0.)
    brick.initialize()

lstm.weights_init = IsotropicGaussian(0.01)
#lstm.weights_init = Orthogonal()
Ejemplo n.º 38
0
 def __init__(self, dim, **kwargs):
   super(MyRnn, self).__init__(**kwargs)
   self.dim = dim
   self.layer1 = SimpleRecurrent(dim=self.dim, activation=Identity(), name='recurrent layer 1', weights_init=initialization.Identity())
   self.layer2 = SimpleRecurrent(dim=self.dim, activation=Identity(), name='recurrent layer 2', weights_init=initialization.Identity())
   self.children = [self.layer1, self.layer2]
Ejemplo n.º 39
0
class EUTHM(UTHM):
    '''
    UTH model with extend information
    '''
    def __init__(self, config, dataset, *args, **kwargs):
        super(EUTHM, self).__init__(config, dataset)

    def _define_inputs(self, *args, **kwargs):
        super(EUTHM, self)._define_inputs()
        self.user_word = tensor.ivector('user_word')
        self.user_word_sparse_mask = tensor.vector('user_word_sparse_mask',
                                                   dtype=theano.config.floatX)
        self.user_word_left_idx = tensor.ivector('user_word_idx_left_idx')
        self.user_word_right_idx = tensor.ivector('user_word_idx_right_idx')
        self.hashtag_word = tensor.ivector('hashtag_word')
        self.hashtag_sparse_mask = tensor.vector('hashtag_word_sparse_mask',
                                                 dtype=theano.config.floatX)
        self.hashtag_word_left_idx = tensor.ivector(
            'hashtag_word_idx_left_idx')
        self.hashtag_word_right_idx = tensor.ivector(
            'hashtag_word_idx_right_idx')
        self.sparse_word = tensor.imatrix('sparse_word')
        self.sparse_word_sparse_mask = tensor.vector(
            'sparse_word_sparse_mask', dtype=theano.config.floatX)
        self.sparse_word_mask = tensor.matrix('sparse_word_mask',
                                              dtype=theano.config.floatX)
        self.sparse_word_left_idx = tensor.ivector('sparse_word_idx_left_idx')
        self.sparse_word_right_idx = tensor.ivector(
            'sparse_word_idx_right_idx')

    def _build_bricks(self, *args, **kwargs):
        # Build lookup tables
        super(EUTHM, self)._build_bricks()
        self.user2word = MLP(
            activations=[Tanh('user2word_tanh')],
            dims=[self.config.user_embed_dim, self.config.word_embed_dim],
            name='user2word_mlp')
        self.user2word.weights_init = IsotropicGaussian(
            std=1 / numpy.sqrt(self.config.word_embed_dim))
        self.user2word.biases_init = Constant(0)
        self.user2word.initialize()
        self.hashtag2word = MLP(
            activations=[Tanh('hashtag2word_tanh')],
            dims=[
                self.config.user_embed_dim + self.config.word_embed_dim,
                self.config.word_embed_dim
            ],
            name='hashtag2word_mlp')
        self.hashtag2word.weights_init = IsotropicGaussian(
            std=1 / numpy.sqrt(self.config.word_embed_dim))
        self.hashtag2word.biases_init = Constant(0)
        self.hashtag2word.initialize()
        self.user2word_bias = Bias(dim=1, name='user2word_bias')
        self.user2word_bias.biases_init = Constant(0)
        self.user2word_bias.initialize()
        self.hashtag2word_bias = Bias(dim=1, name='hashtag2word_bias')
        self.hashtag2word_bias.biases_init = Constant(0)
        self.hashtag2word_bias.initialize()
        #Build character embedding
        self.char_embed = self._embed(len(self.dataset.char2index),
                                      self.config.char_embed_dim,
                                      name='char_embed')
        # Build sparse word encoder
        self.rnn_ins = Linear(input_dim=self.config.char_embed_dim,
                              output_dim=self.config.word_embed_dim,
                              name='rnn_in')
        self.rnn_ins.weights_init = IsotropicGaussian(
            std=numpy.sqrt(2) / numpy.sqrt(self.config.char_embed_dim +
                                           self.config.word_embed_dim))
        self.rnn_ins.biases_init = Constant(0)
        self.rnn_ins.initialize()
        self.rnn = SimpleRecurrent(dim=self.config.word_embed_dim,
                                   activation=Tanh())
        self.rnn.weights_init = IsotropicGaussian(
            std=1 / numpy.sqrt(self.config.word_embed_dim))
        self.rnn.initialize()

    def _set_OV_value(self, *args, **kwargs):
        '''Train a <unk> representation'''
        tensor.set_subtensor(
            self.char_embed.W[self.dataset.char2index['<unk>']],
            numpy.zeros(self.config.char_embed_dim,
                        dtype=theano.config.floatX))

    def _get_text_vec(self, *args, **kwargs):
        # Transpose text
        self.text = self.text.dimshuffle(1, 0)
        self.text_mask = self.text_mask.dimshuffle(1, 0)
        self.sparse_word = self.sparse_word.dimshuffle(1, 0)
        self.sparse_word_mask = self.sparse_word_mask.dimshuffle(1, 0)
        # Turn word, user and hashtag into vector representation
        text_vec = self.word_embed.apply(self.text)
        # Apply user word, hashtag word and url
        text_vec = self._apply_user_word(text_vec)
        text_vec = self._apply_hashtag_word(text_vec)
        text_vec = self._apply_sparse_word(text_vec)
        return text_vec

    @abstractmethod
    def _apply_user_word(self, text_vec, *args, **kwargs):
        '''
        Replace @a with transformed author vector
        :param text_vec:
        :param args:
        :param kwargs:
        :return:
        '''
        user_word_vec = self.user2word.apply(self.user_embed.apply(self.user_word)) + \
                        self.user2word_bias.parameters[0][0]
        text_vec = tensor.set_subtensor(
            text_vec[self.user_word_right_idx, self.user_word_left_idx],
            text_vec[self.user_word_right_idx, self.user_word_left_idx] *
            (1 - self.user_word_sparse_mask[:, None]) +
            user_word_vec * self.user_word_sparse_mask[:, None])
        return text_vec

    @abstractmethod
    def _apply_hashtag_word(self, text_vec, *args, **kwargs):
        '''
        Replace #h with transformed hashtag vector
        :param text_vec:
        :param args:
        :param kwargs:
        :return:
        '''
        hashtag_word_vec = self.hashtag2word.apply(self.hashtag_embed.apply(self.hashtag_word)) +\
                           self.hashtag2word_bias.parameters[0][0]
        text_vec = tensor.set_subtensor(
            text_vec[self.hashtag_word_right_idx, self.hashtag_word_left_idx],
            text_vec[self.hashtag_word_right_idx, self.hashtag_word_left_idx] *
            (1 - self.hashtag_sparse_mask[:, None]) +
            hashtag_word_vec * self.hashtag_sparse_mask[:, None])
        return text_vec

    @abstractmethod
    def _apply_sparse_word(self, text_vec, *args, **kwargs):
        '''
        Replace sparse word encoding with character embedding. (maybe lstm)
        :param text_vec:
        :param args:
        :param kwargs:
        :return:
        '''
        sparse_word_vec = self.char_embed.apply(self.sparse_word)
        sparse_word_hiddens = self.rnn.apply(
            inputs=self.rnn_ins.apply(sparse_word_vec),
            mask=self.sparse_word_mask)
        tmp = sparse_word_hiddens[-1]
        text_vec = tensor.set_subtensor(
            text_vec[self.sparse_word_right_idx, self.sparse_word_left_idx],
            text_vec[self.sparse_word_right_idx, self.sparse_word_left_idx] *
            (1 - self.sparse_word_sparse_mask[:, None]) +
            tmp * self.sparse_word_sparse_mask[:, None])
        return text_vec
Ejemplo n.º 40
0
def rnn_layer(dim, h, n):
    linear = Linear(input_dim=dim, output_dim=dim, name='linear' + str(n))
    rnn = SimpleRecurrent(dim=dim, activation=Tanh(), name='rnn' + str(n))
    initialize([linear, rnn])
    return rnn.apply(linear.apply(h))
Ejemplo n.º 41
0
class ETHM(EUTHM):
    '''Model with only textual-hashtag information'''
    def __init__(self, config, dataset, *args, **kwargs):
        super(ETHM, self).__init__(config, dataset)

    def _build_model(self, *args, **kwargs):
        # Define inputs
        self._define_inputs()
        self._build_bricks()
        self._set_OV_value()
        # Transpose text
        self.text = self.text.dimshuffle(1, 0)
        self.text_mask = self.text_mask.dimshuffle(1, 0)
        self.sparse_word = self.sparse_word.dimshuffle(1, 0)
        self.sparse_word_mask = self.sparse_word_mask.dimshuffle(1, 0)
        # Turn word, and hashtag into vector representation
        text_vec = self.word_embed.apply(self.text)
        # Apply word and hashtag word and url
        text_vec = self._apply_hashtag_word(text_vec)
        text_vec = self._apply_sparse_word(text_vec)
        # Encode text
        mlstm_hidden, mlstm_cell = self.mlstm.apply(
            inputs=self.mlstm_ins.apply(text_vec),
            mask=self.text_mask.astype(theano.config.floatX))
        text_encodes = mlstm_hidden[-1]
        input_vec = text_encodes
        self._get_cost(input_vec, None, None)

    def _define_inputs(self, *args, **kwargs):
        self.hashtag = tensor.ivector('hashtag')
        self.text = tensor.imatrix('text')
        self.text_mask = tensor.matrix('text_mask', dtype=theano.config.floatX)
        self.hashtag_word = tensor.ivector('hashtag_word')
        self.hashtag_sparse_mask = tensor.vector('hashtag_word_sparse_mask',
                                                 dtype=theano.config.floatX)
        self.hashtag_word_left_idx = tensor.ivector(
            'hashtag_word_idx_left_idx')
        self.hashtag_word_right_idx = tensor.ivector(
            'hashtag_word_idx_right_idx')
        self.sparse_word = tensor.imatrix('sparse_word')
        self.sparse_word_sparse_mask = tensor.vector(
            'sparse_word_sparse_mask', dtype=theano.config.floatX)
        self.sparse_word_mask = tensor.matrix('sparse_word_mask',
                                              dtype=theano.config.floatX)
        self.sparse_word_left_idx = tensor.ivector('sparse_word_idx_left_idx')
        self.sparse_word_right_idx = tensor.ivector(
            'sparse_word_idx_right_idx')

    def _build_bricks(self, *args, **kwargs):
        # Build lookup tables
        self.word_embed = self._embed(len(self.dataset.word2index),
                                      self.config.word_embed_dim,
                                      name='word_embed')

        self.hashtag_embed = self._embed(len(self.dataset.hashtag2index),
                                         self.config.lstm_dim,
                                         name='hashtag_embed')
        # Build text encoder
        self.mlstm_ins = Linear(input_dim=self.config.word_embed_dim,
                                output_dim=4 * self.config.lstm_dim,
                                name='mlstm_in')
        self.mlstm_ins.weights_init = IsotropicGaussian(
            std=numpy.sqrt(2) /
            numpy.sqrt(self.config.word_embed_dim + self.config.lstm_dim))
        self.mlstm_ins.biases_init = Constant(0)
        self.mlstm_ins.initialize()
        self.mlstm = MLSTM(self.config.lstm_time,
                           self.config.lstm_dim,
                           shared=False)
        self.mlstm.weights_init = IsotropicGaussian(
            std=numpy.sqrt(2) /
            numpy.sqrt(self.config.word_embed_dim + self.config.lstm_dim))
        self.mlstm.biases_init = Constant(0)
        self.mlstm.initialize()
        self.hashtag2word = MLP(
            activations=[Tanh('hashtag2word_tanh')],
            dims=[self.config.lstm_dim, self.config.word_embed_dim],
            name='hashtag2word_mlp')
        self.hashtag2word.weights_init = IsotropicGaussian(
            std=1 / numpy.sqrt(self.config.word_embed_dim))
        self.hashtag2word.biases_init = Constant(0)
        self.hashtag2word.initialize()
        self.hashtag2word_bias = Bias(dim=1, name='hashtag2word_bias')
        self.hashtag2word_bias.biases_init = Constant(0)
        self.hashtag2word_bias.initialize()
        #Build character embedding
        self.char_embed = self._embed(len(self.dataset.char2index),
                                      self.config.char_embed_dim,
                                      name='char_embed')
        # Build sparse word encoder
        self.rnn_ins = Linear(input_dim=self.config.char_embed_dim,
                              output_dim=self.config.word_embed_dim,
                              name='rnn_in')
        self.rnn_ins.weights_init = IsotropicGaussian(
            std=numpy.sqrt(2) / numpy.sqrt(self.config.char_embed_dim +
                                           self.config.word_embed_dim))
        self.rnn_ins.biases_init = Constant(0)
        self.rnn_ins.initialize()
        self.rnn = SimpleRecurrent(dim=self.config.word_embed_dim,
                                   activation=Tanh())
        self.rnn.weights_init = IsotropicGaussian(
            std=1 / numpy.sqrt(self.config.word_embed_dim))
        self.rnn.initialize()

    def _apply_dropout(self, outputs, *args, **kwargs):
        variables = [self.word_embed.W, self.hashtag_embed.W]
        cgs = ComputationGraph(outputs)
        cg_dropouts = apply_dropout(cgs,
                                    variables,
                                    drop_prob=self.config.dropout_prob,
                                    seed=123).outputs
        return cg_dropouts

    def _apply_reg(self, cost, params=None, *args, **kwargs):
        try:
            if self.config.l2_norm > 0:
                cost = cost + self.config.l2_norm * theano_expressions.l2_norm(
                    tensors=[self.hashtag_embed.W, self.word_embed.W])**2

            else:
                pass
        except Exception:
            pass
        return cost
Ejemplo n.º 42
0
def rnn_layer(in_dim, h, h_dim, n):
    linear = Linear(input_dim=in_dim, output_dim=h_dim, name='linear' + str(n) + h.name)
    rnn = SimpleRecurrent(dim=h_dim, name='rnn' + str(n))
    initialize([linear, rnn])
    return rnn.apply(linear.apply(h))
Ejemplo n.º 43
0
class Rnn(Initializable, BaseRecurrent):
    def __init__(self, dims=(88, 100, 100), **kwargs):
        super(Rnn, self).__init__(**kwargs)
        self.dims = dims

        self.input_transform = Linear(
            input_dim=dims[0],
            output_dim=dims[1],
            weights_init=IsotropicGaussian(0.01),
            # biases_init=Constant(0.0),
            use_bias=False,
            name="input_transfrom")

        self.gru_layer = SimpleRecurrent(dim=dims[1],
                                         activation=Tanh(),
                                         weights_init=IsotropicGaussian(0.01),
                                         biases_init=Constant(0.0),
                                         use_bias=True,
                                         name="gru_rnn_layer")

        # TODO: find a way to automatically set the output dim in case of lstm vs normal rnn
        self.linear_trans = Linear(input_dim=dims[1],
                                   output_dim=dims[2] * 4,
                                   weights_init=IsotropicGaussian(0.01),
                                   biases_init=Constant(0.0),
                                   use_bias=False,
                                   name="h2h_transform")

        self.lstm_layer = LSTM(dim=dims[2],
                               activation=Tanh(),
                               weights_init=IsotropicGaussian(0.01),
                               biases_init=Constant(0.0),
                               use_bias=True,
                               name="lstm_rnn_layer")

        self.out_transform = MLP(activations=[Sigmoid()],
                                 dims=[dims[2], dims[0]],
                                 weights_init=IsotropicGaussian(0.01),
                                 use_bias=True,
                                 biases_init=Constant(0.0),
                                 name="out_layer")

        self.children = [
            self.input_transform, self.gru_layer, self.linear_trans,
            self.lstm_layer, self.out_transform
        ]

    # @recurrent(sequences=['inputs', 'input_mask'], contexts=[],
    # states=['gru_state', 'lstm_state', 'lstm_cells'],
    # outputs=['gru_state', 'lstm_state', 'lstm_cells'])
    def rnn_apply(self,
                  inputs,
                  mask=None,
                  gru_state=None,
                  lstm_state=None,
                  lstm_cells=None):
        input_transform = self.input_transform.apply(inputs)
        gru_state = self.gru_layer.apply(
            inputs=input_transform,
            # update_inputs=input_transform,
            # reset_inputs=input_transform,
            states=gru_state,
            mask=mask,
            iterate=False)
        lstm_transform = self.linear_trans.apply(gru_state)
        lstm_state, lstm_cells = self.lstm_layer.apply(inputs=lstm_transform,
                                                       states=lstm_state,
                                                       cells=lstm_cells,
                                                       mask=mask,
                                                       iterate=False)
        return gru_state, lstm_state, lstm_cells

    @recurrent(sequences=[],
               contexts=[],
               states=['inputs', 'gru_state', 'lstm_state', 'lstm_cells'],
               outputs=['inputs', 'gru_state', 'lstm_state', 'lstm_cells'])
    def rnn_generate(self,
                     inputs=None,
                     gru_state=None,
                     lstm_state=None,
                     lstm_cells=None):
        output = self.apply(inputs=inputs,
                            gru_state=gru_state,
                            lstm_state=lstm_state,
                            lstm_cells=lstm_cells,
                            iterate=False)
        return output, gru_state, lstm_state, lstm_cells

    @recurrent(sequences=['inputs', 'mask'],
               contexts=[],
               states=['gru_state', 'lstm_state', 'lstm_cells'],
               outputs=['output', 'gru_state', 'lstm_state', 'lstm_cells'])
    def apply(self,
              inputs,
              mask,
              gru_state=None,
              lstm_state=None,
              lstm_cells=None):
        # input_transform = self.input_transform.apply(inputs)
        # gru_state = self.gru_layer.apply(
        # inputs=input_transform,
        #     mask=mask,
        #     states=gru_state,
        #     iterate=False)
        # lstm_transform = self.linear_trans.apply(gru_state)
        # lstm_state, lstm_cells = self.lstm_layer.apply(inputs=lstm_transform, states=lstm_state,
        #                                                cells=lstm_cells,
        #                                                mask=mask, iterate=False)
        gru_state, lstm_state, lstm_cells = self.rnn_apply(
            inputs=inputs,
            mask=mask,
            gru_state=gru_state,
            lstm_state=lstm_state,
            lstm_cells=lstm_cells)

        output = 1.17 * self.out_transform.apply(lstm_state) * mask[:, None]
        return output, gru_state, lstm_state, lstm_cells

    def get_dim(self, name):
        dims = dict(zip(['outputs', 'gru_state', 'lstm_state'], self.dims))
        dims['lstm_cells'] = dims['lstm_state']
        return dims.get(name, None) or super(Rnn, self).get_dim(name)
Ejemplo n.º 44
0
lookup_input = LookupTable(name='lookup_input',
                           length=train_dataset.syllables_vocab_size() + 1,
                           dim=hidden_layer_dim,
                           weights_init=initialization.Uniform(width=0.01),
                           biases_init=Constant(0))
lookup_input.initialize()

linear_input = Linear(name='linear_input',
                      input_dim=hidden_layer_dim,
                      output_dim=hidden_layer_dim,
                      weights_init=initialization.Uniform(width=0.01),
                      biases_init=Constant(0))
linear_input.initialize()

rnn = SimpleRecurrent(name='hidden',
                      dim=hidden_layer_dim,
                      activation=Tanh(),
                      weights_init=initialization.Uniform(width=0.01))
rnn.initialize()

linear_output = Linear(name='linear_output',
                       input_dim=hidden_layer_dim,
                       output_dim=train_dataset.durations_vocab_size(),
                       weights_init=initialization.Uniform(width=0.01),
                       biases_init=Constant(0))
linear_output.initialize()

softmax = NDimensionalSoftmax(name='ndim_softmax')

activation_input = lookup_input.apply(x)
hidden = rnn.apply(linear_input.apply(activation_input))
activation_output = linear_output.apply(hidden)
Ejemplo n.º 45
0
n_epochs = 30
x_dim = 1
h_dim = 100
o_dim = 10
batch_size = 50

print 'Building model ...'
# T x B x F
x = tensor.tensor3('x', dtype=floatX)
y = tensor.tensor3('y', dtype='int32')

x_to_h1 = Linear(name='x_to_h1',
                 input_dim=x_dim,
                 output_dim=h_dim)
pre_rnn = x_to_h1.apply(x)
rnn = SimpleRecurrent(activation=Rectifier(),
                      dim=h_dim, name="rnn")
h1 = rnn.apply(pre_rnn)
h1_to_o = Linear(name='h1_to_o',
                 input_dim=h_dim,
                 output_dim=o_dim)
pre_softmax = h1_to_o.apply(h1)
softmax = Softmax()
shape = pre_softmax.shape
softmax_out = softmax.apply(pre_softmax.reshape((-1, o_dim)))
softmax_out = softmax_out.reshape(shape)
softmax_out.name = 'softmax_out'

# comparing only last time-step
cost = CategoricalCrossEntropy().apply(y[-1, :, 0], softmax_out[-1])
cost.name = 'CrossEntropy'
error_rate = MisclassificationRate().apply(y[-1, :, 0], softmax_out[-1])
Ejemplo n.º 46
0
def test_attention_recurrent():
    rng = numpy.random.RandomState(1234)

    dim = 5
    batch_size = 4
    input_length = 20

    attended_dim = 10
    attended_length = 15

    wrapped = SimpleRecurrent(dim, Identity())
    attention = SequenceContentAttention(state_names=wrapped.apply.states,
                                         attended_dim=attended_dim,
                                         match_dim=attended_dim)
    recurrent = AttentionRecurrent(wrapped, attention, seed=1234)
    recurrent.weights_init = IsotropicGaussian(0.5)
    recurrent.biases_init = Constant(0)
    recurrent.initialize()

    attended = tensor.tensor3("attended")
    attended_mask = tensor.matrix("attended_mask")
    inputs = tensor.tensor3("inputs")
    inputs_mask = tensor.matrix("inputs_mask")
    outputs = recurrent.apply(inputs=inputs,
                              mask=inputs_mask,
                              attended=attended,
                              attended_mask=attended_mask)
    states, glimpses, weights = outputs
    assert states.ndim == 3
    assert glimpses.ndim == 3
    assert weights.ndim == 3

    # For values.
    def rand(size):
        return rng.uniform(size=size).astype(floatX)

    # For masks.
    def generate_mask(length, batch_size):
        mask = numpy.ones((length, batch_size), dtype=floatX)
        # To make it look like read data
        for i in range(batch_size):
            mask[1 + rng.randint(0, length - 1):, i] = 0.0
        return mask

    input_vals = rand((input_length, batch_size, dim))
    input_mask_vals = generate_mask(input_length, batch_size)
    attended_vals = rand((attended_length, batch_size, attended_dim))
    attended_mask_vals = generate_mask(attended_length, batch_size)

    func = theano.function([inputs, inputs_mask, attended, attended_mask],
                           [states, glimpses, weights])
    states_vals, glimpses_vals, weight_vals = func(input_vals, input_mask_vals,
                                                   attended_vals,
                                                   attended_mask_vals)
    assert states_vals.shape == (input_length, batch_size, dim)
    assert glimpses_vals.shape == (input_length, batch_size, attended_dim)

    assert (len(ComputationGraph(outputs).shared_variables) == len(
        Selector(recurrent).get_params()))

    # weights for not masked position must be zero
    assert numpy.all(weight_vals * (1 - attended_mask_vals.T) == 0)
    # weights for masked positions must be non-zero
    assert numpy.all(abs(weight_vals + (1 - attended_mask_vals.T)) > 1e-5)
    # weights from different steps should be noticeably different
    assert (abs(weight_vals[0] - weight_vals[1])).sum() > 1e-2
    # weights for all state after the last masked position should be same
    for i in range(batch_size):
        last = int(input_mask_vals[:, i].sum())
        for j in range(last, input_length):
            assert_allclose(weight_vals[last, i], weight_vals[j, i])

    # freeze sums
    assert_allclose(weight_vals.sum(), input_length * batch_size, 1e-5)
    assert_allclose(states_vals.sum(), 113.429, rtol=1e-5)
    assert_allclose(glimpses_vals.sum(), 415.901, rtol=1e-5)
Ejemplo n.º 47
0
def test_attention_recurrent():
    rng = numpy.random.RandomState(1234)

    dim = 5
    batch_size = 4
    input_length = 20

    attended_dim = 10
    attended_length = 15

    wrapped = SimpleRecurrent(dim, Identity())
    attention = SequenceContentAttention(
        state_names=wrapped.apply.states,
        attended_dim=attended_dim, match_dim=attended_dim)
    recurrent = AttentionRecurrent(wrapped, attention, seed=1234)
    recurrent.weights_init = IsotropicGaussian(0.5)
    recurrent.biases_init = Constant(0)
    recurrent.initialize()

    attended = tensor.tensor3("attended")
    attended_mask = tensor.matrix("attended_mask")
    inputs = tensor.tensor3("inputs")
    inputs_mask = tensor.matrix("inputs_mask")
    outputs = recurrent.apply(
        inputs=inputs, mask=inputs_mask,
        attended=attended, attended_mask=attended_mask)
    states, glimpses, weights = outputs
    assert states.ndim == 3
    assert glimpses.ndim == 3
    assert weights.ndim == 3

    # For values.
    def rand(size):
        return rng.uniform(size=size).astype(theano.config.floatX)

    # For masks.
    def generate_mask(length, batch_size):
        mask = numpy.ones((length, batch_size), dtype=theano.config.floatX)
        # To make it look like read data
        for i in range(batch_size):
            mask[1 + rng.randint(0, length - 1):, i] = 0.0
        return mask

    input_vals = rand((input_length, batch_size, dim))
    input_mask_vals = generate_mask(input_length, batch_size)
    attended_vals = rand((attended_length, batch_size, attended_dim))
    attended_mask_vals = generate_mask(attended_length, batch_size)

    func = theano.function([inputs, inputs_mask, attended, attended_mask],
                           [states, glimpses, weights])
    states_vals, glimpses_vals, weight_vals = func(
        input_vals, input_mask_vals,
        attended_vals, attended_mask_vals)
    assert states_vals.shape == (input_length, batch_size, dim)
    assert glimpses_vals.shape == (input_length, batch_size, attended_dim)

    assert (len(ComputationGraph(outputs).shared_variables) ==
            len(Selector(recurrent).get_parameters()))

    # Manual reimplementation
    inputs2d = tensor.matrix()
    states2d = tensor.matrix()
    mask1d = tensor.vector()
    weighted_averages = tensor.matrix()
    distribute_func = theano.function(
        [inputs2d, weighted_averages],
        recurrent.distribute.apply(
            inputs=inputs2d,
            weighted_averages=weighted_averages))
    wrapped_apply_func = theano.function(
        [states2d, inputs2d, mask1d], wrapped.apply(
            states=states2d, inputs=inputs2d, mask=mask1d, iterate=False))
    attention_func = theano.function(
        [states2d, attended, attended_mask],
        attention.take_glimpses(
            attended=attended, attended_mask=attended_mask,
            states=states2d))
    states_man = wrapped.initial_states(batch_size).eval()
    glimpses_man = numpy.zeros((batch_size, attended_dim),
                               dtype=theano.config.floatX)
    for i in range(input_length):
        inputs_man = distribute_func(input_vals[i], glimpses_man)
        states_man = wrapped_apply_func(states_man, inputs_man,
                                        input_mask_vals[i])
        glimpses_man, weights_man = attention_func(
            states_man, attended_vals, attended_mask_vals)
        assert_allclose(states_man, states_vals[i], rtol=1e-5)
        assert_allclose(glimpses_man, glimpses_vals[i], rtol=1e-5)
        assert_allclose(weights_man, weight_vals[i], rtol=1e-5)

    # weights for not masked position must be zero
    assert numpy.all(weight_vals * (1 - attended_mask_vals.T) == 0)
    # weights for masked positions must be non-zero
    assert numpy.all(abs(weight_vals + (1 - attended_mask_vals.T)) > 1e-5)
    # weights from different steps should be noticeably different
    assert (abs(weight_vals[0] - weight_vals[1])).sum() > 1e-2
    # weights for all state after the last masked position should be same
    for i in range(batch_size):
        last = int(input_mask_vals[:, i].sum())
        for j in range(last, input_length):
            assert_allclose(weight_vals[last, i], weight_vals[j, i], 1e-5)
Ejemplo n.º 48
0
def build_model_hard(vocab_size, args, dtype=floatX):
    logger.info('Building model ...')

    # Parameters for the model
    context = args.context
    state_dim = args.state_dim
    layers = args.layers
    skip_connections = args.skip_connections

    # Symbolic variables
    # In both cases: Time X Batch
    x = tensor.lmatrix('features')
    y = tensor.lmatrix('targets')

    # Build the model
    output_names = []
    output_dims = []
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if d == 0 or skip_connections:
            output_names.append("inputs" + suffix)
            output_dims.append(state_dim)

    lookup = LookupTable(length=vocab_size, dim=state_dim)
    lookup.weights_init = initialization.IsotropicGaussian(0.1)
    lookup.biases_init = initialization.Constant(0)

    fork = Fork(output_names=output_names,
                input_dim=args.mini_batch_size,
                output_dims=output_dims,
                prototype=FeedforwardSequence([lookup.apply]))

    transitions = [SimpleRecurrent(dim=state_dim, activation=Tanh())]
    for i in range(layers - 1):
        mlp = MLP(activations=[Logistic()],
                  dims=[2 * state_dim, 1],
                  weights_init=initialization.IsotropicGaussian(0.1),
                  biases_init=initialization.Constant(0),
                  name="mlp_" + str(i))
        transitions.append(
            HardGatedRecurrent(dim=state_dim, mlp=mlp, activation=Tanh()))

    rnn = RecurrentStack(transitions, skip_connections=skip_connections)

    # dim = layers * state_dim
    output_layer = Linear(input_dim=layers * state_dim,
                          output_dim=vocab_size,
                          name="output_layer")

    # Return list of 3D Tensor, one for each layer
    # (Time X Batch X embedding_dim)
    pre_rnn = fork.apply(x)

    # Give a name to the input of each layer
    if skip_connections:
        for t in range(len(pre_rnn)):
            pre_rnn[t].name = "pre_rnn_" + str(t)
    else:
        pre_rnn.name = "pre_rnn"

    # Prepare inputs for the RNN
    kwargs = OrderedDict()
    init_states = {}
    for d in range(layers):
        if d > 0:
            suffix = '_' + str(d)
        else:
            suffix = ''
        if skip_connections:
            kwargs['inputs' + suffix] = pre_rnn[d]
        elif d == 0:
            kwargs['inputs' + suffix] = pre_rnn
        init_states[d] = theano.shared(numpy.zeros(
            (args.mini_batch_size, state_dim)).astype(floatX),
                                       name='state0_%d' % d)
        kwargs['states' + suffix] = init_states[d]

    # Apply the RNN to the inputs
    h = rnn.apply(low_memory=True, **kwargs)

    # Now we have correctly:
    # h = [state_1, state_2, state_3 ...]

    # Save all the last states
    last_states = {}
    for d in range(layers):
        last_states[d] = h[d][-1, :, :]

    # Concatenate all the states
    if layers > 1:
        h = tensor.concatenate(h, axis=2)
    h.name = "hidden_state"

    # The updates of the hidden states
    updates = []
    for d in range(layers):
        updates.append((init_states[d], last_states[d]))

    presoft = output_layer.apply(h[context:, :, :])
    # Define the cost
    # Compute the probability distribution
    time, batch, feat = presoft.shape
    presoft.name = 'presoft'

    cross_entropy = Softmax().categorical_cross_entropy(
        y[context:, :].flatten(), presoft.reshape((batch * time, feat)))
    cross_entropy = cross_entropy / tensor.log(2)
    cross_entropy.name = "cross_entropy"

    # TODO: add regularisation for the cost
    # the log(1) is here in order to differentiate the two variables
    # for monitoring
    cost = cross_entropy + tensor.log(1)
    cost.name = "regularized_cost"

    # Initialize the model
    logger.info('Initializing...')

    fork.initialize()

    rnn.weights_init = initialization.Orthogonal()
    rnn.biases_init = initialization.Constant(0)
    rnn.initialize()

    output_layer.weights_init = initialization.IsotropicGaussian(0.1)
    output_layer.biases_init = initialization.Constant(0)
    output_layer.initialize()

    return cost, cross_entropy, updates
Ejemplo n.º 49
0
# Parameters
n_u = 225  # input vector size (not time at this point)
n_y = 225  # output vector size
n_h = 500  # numer of hidden units

iteration = 300  # number of epochs of gradient descent

print "Building Model"
# Symbolic variables
x = tensor.tensor3('x', dtype=floatX)
target = tensor.tensor3('target', dtype=floatX)

# Build the model
linear = Linear(input_dim=n_u, output_dim=n_h, name="first_layer")
rnn = SimpleRecurrent(dim=n_h, activation=Tanh())
linear2 = Linear(input_dim=n_h, output_dim=n_y, name="output_layer")
sigm = Sigmoid()

x_transform = linear.apply(x)
h = rnn.apply(x_transform)
predict = sigm.apply(linear2.apply(h))


# only for generation B x h_dim
h_initial = tensor.tensor3('h_initial', dtype=floatX)
h_testing = rnn.apply(x_transform, h_initial, iterate=False)
y_hat_testing = linear2.apply(h_testing)
y_hat_testing = sigm.apply(y_hat_testing)
y_hat_testing.name = 'y_hat_testing'
print('Building model ...')
# T x B x F
x = tensor.tensor3('x', dtype=floatX)
# T x B
x_mask = tensor.matrix('x_mask', dtype=floatX)
# L x B
y = tensor.matrix('y', dtype=floatX)
# L x B
y_mask = tensor.matrix('y_mask', dtype=floatX)

x_to_h = Linear(name='x_to_h',
                input_dim=x_dim,
                output_dim=h_dim)
x_transform = x_to_h.apply(x)
rnn = SimpleRecurrent(activation=Tanh(),
                      dim=h_dim, name="rnn")
h = rnn.apply(x_transform)
h_to_o = Linear(name='h_to_o',
                input_dim=h_dim,
                output_dim=num_classes + 1)
h_transform = h_to_o.apply(h)
# T x B x C+1
y_hat = tensor.nnet.softmax(
    h_transform.reshape((-1, num_classes + 1))
).reshape((h.shape[0], h.shape[1], -1))
y_hat.name = 'y_hat'

y_hat_mask = x_mask
cost = CTC().apply(y, y_hat, y_mask, y_hat_mask, 'normal_scale')
cost.name = 'CTC'
# Initialization
Ejemplo n.º 51
0
    def __init__(self, input_sources_list, input_sources_vocab_size_list,
                 output_source, output_source_vocab_size,
                 lookup_dim=200, hidden_size=256, recurrent_stack_size=1):

        self.InputSources = input_sources_list
        self.InputSourcesVocab = input_sources_vocab_size_list
        self.OutputSource = output_source
        self.OutputSourceVocab = output_source_vocab_size

        inputs = [tensor.lmatrix(source) for source in input_sources_list]
        output = tensor.lmatrix(output_source)

        lookups = self.get_lookups(lookup_dim, input_sources_vocab_size_list)

        for lookup in lookups:
            lookup.initialize()

        merge = Merge([lookup.name for lookup in lookups], [lookup.dim for lookup in lookups], hidden_size,
                              weights_init=initialization.Uniform(width=0.01),
                              biases_init=Constant(0))
        merge.initialize()

        linear0 = Linear(input_dim=hidden_size, output_dim=hidden_size,
                        weights_init=initialization.Uniform(width=0.01),
                        biases_init=Constant(0), name='linear0')
        linear0.initialize()

        recurrent_blocks = []

        for i in range(recurrent_stack_size):
            recurrent_blocks.append(SimpleRecurrent(
                dim=hidden_size, activation=Tanh(),
                weights_init=initialization.Uniform(width=0.01),
                use_bias=False))

        for i, recurrent_block in enumerate(recurrent_blocks):
            recurrent_block.name = 'recurrent'+str(i+1)
            recurrent_block.initialize()

        linear_out = Linear(input_dim=hidden_size, output_dim=output_source_vocab_size,
                              weights_init=initialization.Uniform(width=0.01),
                              biases_init=Constant(0), name='linear_out')
        linear_out.initialize()
        softmax = NDimensionalSoftmax(name='softmax')

        lookup_outputs = [lookup.apply(input) for lookup, input in zip(lookups, inputs)]

        m = merge.apply(*lookup_outputs)
        r = linear0.apply(m)
        for block in recurrent_blocks:
            r = block.apply(r)
        a = linear_out.apply(r)

        self.Cost = softmax.categorical_cross_entropy(output, a, extra_ndim=1).mean()
        self.Cost.name = 'cost'

        y_hat = softmax.apply(a, extra_ndim=1)
        y_hat.name = 'y_hat'

        self.ComputationGraph = ComputationGraph(self.Cost)

        self.Function = None
        self.MainLoop = None
        self.Model = Model(y_hat)
Ejemplo n.º 52
0
def main(save_to, num_epochs):
    batch_size = 128
    dim = 100
    n_steps = 20
    i2h1 = MLP([Identity()], [784, dim], biases_init=Constant(0.), weights_init=IsotropicGaussian(.001))
    h2o1 = MLP([Rectifier(), Logistic()], [dim, dim, 784],
               biases_init=Constant(0.), weights_init=IsotropicGaussian(.001))
    rec1 = SimpleRecurrent(dim=dim, activation=Tanh(), weights_init=Orthogonal())
    i2h1.initialize()
    h2o1.initialize()
    rec1.initialize()

    x = tensor.tensor3('features')
    x1 = x[1:, :, :]
    x2 = x[:-1, :, :]

    preproc = i2h1.apply(x1)
    h1 = rec1.apply(preproc)
    x_hat = h2o1.apply(h1)
    cost = tensor.nnet.binary_crossentropy(x_hat, x2).mean()
    # cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    cost.name = 'final_cost'

    cg = ComputationGraph([cost, ])

    mnist_train = MNIST("train", subset=slice(0, 50000), sources=('features', ))
    mnist_valid = MNIST("train", subset=slice(50000, 60000), sources=('features',))
    mnist_test = MNIST("test")
    trainstream = Mapping(Flatten(DataStream(mnist_train,
                          iteration_scheme=SequentialScheme(50000, batch_size))),
                          _meanize(n_steps))
    validstream = Mapping(Flatten(DataStream(mnist_valid,
                                             iteration_scheme=SequentialScheme(10000,
                                                                               batch_size))),
                          _meanize(n_steps))
    teststream = Mapping(Flatten(DataStream(mnist_test,
                                            iteration_scheme=SequentialScheme(10000,
                                                                              batch_size))),
                         _meanize(n_steps))

    algorithm = GradientDescent(
        cost=cost, params=cg.parameters,
        step_rule=CompositeRule([Adam(), StepClipping(100)]))
    main_loop = MainLoop(
        algorithm,
        trainstream,
        extensions=[Timing(),
                    FinishAfter(after_n_epochs=num_epochs),
                    # DataStreamMonitoring(
                    #     [cost, ],
                    #     teststream,
                    #     prefix="test"),
                    DataStreamMonitoringAndSaving(
                    [cost, ],
                    validstream,
                    [i2h1, h2o1, rec1],
                    'best_'+save_to+'.pkl',
                    cost_name=cost.name,
                    after_epoch=True,
                    prefix='valid'),
                    TrainingDataMonitoring(
                        [cost,
                         aggregation.mean(algorithm.total_gradient_norm)],
                        prefix="train",
                        after_epoch=True),
                    # Plot(
                    #     save_to,
                    #     channels=[
                    #         ['test_final_cost',
                    #          'test_misclassificationrate_apply_error_rate'],
                    #         ['train_total_gradient_norm']]),
                    Printing()])
    main_loop.run()
Ejemplo n.º 53
0
def build_model_soft(args, dtype=floatX):
    logger.info('Building model ...')

    # Return list of 3D Tensor, one for each layer
    # (Time X Batch X embedding_dim)
    pre_rnn, x_mask = get_prernn(args)

    transitions = [SimpleRecurrent(dim=args.state_dim, activation=Tanh())]

    # Build the MLP
    dims = [2 * args.state_dim]
    activations = []
    for i in range(args.mlp_layers):
        activations.append(Rectifier())
        dims.append(args.state_dim)

    # Activation of the last layer of the MLP
    if args.mlp_activation == "logistic":
        activations.append(Logistic())
    elif args.mlp_activation == "rectifier":
        activations.append(Rectifier())
    elif args.mlp_activation == "hard_logistic":
        activations.append(HardLogistic())
    else:
        assert False

    # Output of MLP has dimension 1
    dims.append(1)

    for i in range(args.layers - 1):
        mlp = MLP(activations=activations, dims=dims,
                  weights_init=initialization.IsotropicGaussian(0.1),
                  biases_init=initialization.Constant(0),
                  name="mlp_" + str(i))
        transitions.append(
            SoftGatedRecurrent(dim=args.state_dim,
                               mlp=mlp,
                               activation=Tanh()))

    rnn = RecurrentStack(transitions, skip_connections=args.skip_connections)
    initialize_rnn(rnn, args)

    # Prepare inputs and initial states for the RNN
    kwargs, inits = get_rnn_kwargs(pre_rnn, args)

    # Apply the RNN to the inputs
    h = rnn.apply(low_memory=True, mask=x_mask, **kwargs)

    # Now we have:
    # h = [state, state_1, gate_value_1, state_2, gate_value_2, state_3, ...]

    # Extract gate_values
    gate_values = h[2::2]
    new_h = [h[0]]
    new_h.extend(h[1::2])
    h = new_h

    # Now we have:
    # h = [state, state_1, state_2, ...]
    # gate_values = [gate_value_1, gate_value_2, gate_value_3]

    for i, gate_value in enumerate(gate_values):
        gate_value.name = "gate_value_" + str(i)

    # Save all the last states
    last_states = {}
    hidden_states = []
    for d in range(args.layers):
        h[d] = h[d] * x_mask
        last_states[d] = h[d][-1, :, :]
        h[d].name = "hidden_state_" + str(d)
        hidden_states.append(h[d])

    # Concatenate all the states
    if args.layers > 1:
        h = tensor.concatenate(h, axis=2)
    h.name = "hidden_state_all"

    # The updates of the hidden states
    updates = []
    for d in range(args.layers):
        updates.append((inits[0][d], last_states[d]))

    presoft = get_presoft(h, args)

    cost, cross_entropy = get_costs(presoft, args)

    return cost, cross_entropy, updates, gate_values, hidden_states