Beispiel #1
0
    def __init__(self, dimension, alphabet_size, **kwargs):
        super(WordReverser, self).__init__(**kwargs)
        encoder = Bidirectional(
            SimpleRecurrent(dim=dimension, activation=Tanh()))
        fork = Fork([name for name in encoder.prototype.apply.sequences
                    if name != 'mask'])
        fork.input_dim = dimension
        fork.output_dims = [dimension for name in fork.input_names]
        lookup = LookupTable(alphabet_size, dimension)
        transition = SimpleRecurrent(
            activation=Tanh(),
            dim=dimension, name="transition")
        attention = SequenceContentAttention(
            state_names=transition.apply.states,
            attended_dim=2 * dimension, match_dim=dimension, name="attention")
        readout = Readout(
            readout_dim=alphabet_size,
            source_names=[transition.apply.states[0],
                          attention.take_glimpses.outputs[0]],
            emitter=SoftmaxEmitter(name="emitter"),
            feedback_brick=LookupFeedback(alphabet_size, dimension),
            name="readout")
        generator = SequenceGenerator(
            readout=readout, transition=transition, attention=attention,
            name="generator")

        self.lookup = lookup
        self.fork = fork
        self.encoder = encoder
        self.generator = generator
        self.children = [lookup, fork, encoder, generator]
Beispiel #2
0
    def __init__(self,
                 num_input_words,
                 emb_dim,
                 dim,
                 vocab,
                 lookup=None,
                 fork_and_rnn=None,
                 **kwargs):

        if num_input_words > 0:
            logger.info("Restricting def vocab to " + str(num_input_words))
            self._num_input_words = num_input_words
        else:
            self._num_input_words = vocab.size()

        self._vocab = vocab

        children = []

        if lookup is None:
            self._def_lookup = LookupTable(self._num_input_words,
                                           emb_dim,
                                           name='def_lookup')
        else:
            self._def_lookup = lookup

        if fork_and_rnn is None:
            self._def_fork = Linear(emb_dim, 4 * dim, name='def_fork')
            self._def_rnn = LSTM(dim, name='def_rnn')
        else:
            self._def_fork, self._def_rnn = fork_and_rnn

        children.extend([self._def_lookup, self._def_fork, self._def_rnn])

        super(LSTMReadDefinitions, self).__init__(children=children, **kwargs)
Beispiel #3
0
    def __init__(self, emb_dim, dim, num_input_words, 
                 num_output_words, vocab, 
                 **kwargs):
        if emb_dim == 0:
            emb_dim = dim
        if num_input_words == 0:
            num_input_words = vocab.size()
        if num_output_words == 0:
            num_output_words = vocab.size()

        self._num_input_words = num_input_words
        self._num_output_words = num_output_words
        self._vocab = vocab

        self._word_to_id = WordToIdOp(self._vocab)

        children = []

        self._main_lookup = LookupTable(self._num_input_words, emb_dim, name='main_lookup')
        self._encoder_fork = Linear(emb_dim, 4 * dim, name='encoder_fork')
        self._encoder_rnn = LSTM(dim, name='encoder_rnn')
        self._decoder_fork = Linear(emb_dim, 4 * dim, name='decoder_fork')
        self._decoder_rnn = LSTM(dim, name='decoder_rnn')
        children.extend([self._main_lookup,
                         self._encoder_fork, self._encoder_rnn,
                         self._decoder_fork, self._decoder_rnn])
        self._pre_softmax = Linear(dim, self._num_output_words)
        self._softmax = NDimensionalSoftmax()
        children.extend([self._pre_softmax, self._softmax])

        super(LanguageModel, self).__init__(children=children, **kwargs)
Beispiel #4
0
    def __init__(self, vocab_size, embedding_dim, dgru_state_dim, dgru_depth,
                 **kwargs):
        super(Decimator, self).__init__(**kwargs)

        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.dgru_state_dim = dgru_state_dim
        self.embedding_dim = embedding_dim
        self.lookup = LookupTable(name='embeddings')
        self.dgru_depth = dgru_depth
        # representation
        self.dgru = RecurrentStack([
            DGRU(activation=Tanh(), dim=self.dgru_state_dim)
            for _ in range(dgru_depth)
        ],
                                   skip_connections=True)
        # importance of this representation
        self.bidir_w = Bidirectional(RecurrentWithFork(
            DGRU(activation=Tanh(), dim=self.dgru_state_dim // 2),
            self.embedding_dim,
            name='src_word_with_fork'),
                                     name='bidir_src_word_encoder')

        self.gru_fork = Fork(
            [name for name in self.dgru.apply.sequences if name != 'mask'],
            prototype=Linear(),
            name='gru_fork')
        # map to a energy scalar
        self.wl = Linear(input_dim=dgru_state_dim, output_dim=1)

        self.children = [
            self.lookup, self.dgru, self.gru_fork, self.bidir_w, self.wl
        ]
Beispiel #5
0
    def __init__(self, dimension, alphabet_size, **kwargs):
        super(SimpleGenerator, self).__init__(**kwargs)
        lookup = LookupTable(alphabet_size, dimension)
        transition = SimpleRecurrent(activation=Tanh(),
                                     dim=dimension,
                                     name="transition")
        attention = SequenceContentAttention(
            state_names=transition.apply.states,
            attended_dim=dimension,
            match_dim=dimension,
            name="attention")
        readout = Readout(readout_dim=alphabet_size,
                          source_names=[
                              transition.apply.states[0],
                              attention.take_glimpses.outputs[0]
                          ],
                          emitter=SoftmaxEmitter(name="emitter"),
                          feedback_brick=LookupFeedback(
                              alphabet_size, dimension),
                          name="readout")
        generator = SequenceGenerator(readout=readout,
                                      transition=transition,
                                      attention=attention,
                                      name="generator")

        self.lookup = lookup
        self.generator = generator
        self.children = [lookup, generator]
Beispiel #6
0
    def __init__(self, num_outputs=None, feedback_dim=None, **kwargs):
        super(LookupFeedback, self).__init__(**kwargs)
        update_instance(self, locals())

        self.lookup = LookupTable(num_outputs, feedback_dim,
                                  weights_init=self.weights_init)
        self.children = [self.lookup]
Beispiel #7
0
def test_lookup_table():
    lt = LookupTable(5, 3)
    lt.allocate()

    lt.W.set_value(numpy.arange(15).reshape(5, 3).astype(theano.config.floatX))

    x = tensor.lmatrix("x")
    y = lt.apply(x)
    f = theano.function([x], [y])

    x_val = [[1, 2], [0, 3]]
    desired = numpy.array([[[3, 4, 5], [6, 7, 8]], [[0, 1, 2], [9, 10, 11]]],
                          dtype=theano.config.floatX)
    assert_equal(f(x_val)[0], desired)

    # Test get_dim
    assert_equal(lt.get_dim(lt.apply.inputs[0]), 0)
    assert_equal(lt.get_dim(lt.apply.outputs[0]), lt.dim)
    assert_raises(ValueError, lt.get_dim, 'random_name')

    # Test feedforward interface
    assert lt.input_dim == 0
    assert lt.output_dim == 3
    lt.output_dim = 4
    assert lt.output_dim == 4

    def assign_input_dim():
        lt.input_dim = 11

    assert_raises(ValueError, assign_input_dim)
    lt.input_dim = 0
Beispiel #8
0
    def __init__(
            self,
            encoder_type,
            num_characters,
            input_dim,
            encoder_dim,
            **kwargs):
        assert encoder_type in [None, 'bidirectional']
        self.encoder_type = encoder_type
        super(Encoder, self).__init__(**kwargs)

        self.children = []

        if encoder_type in ['lookup', 'bidirectional']:
            self.embed_label = LookupTable(
                num_characters,
                input_dim,
                name='embed_label')
            self.children += [
                self.embed_label]
        else:
            # If there is no encoder.
            assert num_characters == input_dim

        if encoder_type == 'bidirectional':
            transition = RecurrentWithFork(
                GatedRecurrent(dim=encoder_dim).apply,
                input_dim, name='encoder_transition')
            self.encoder = Bidirectional(transition, name='encoder')
            self.children.append(self.encoder)
    def __init__(self, num_outputs=None, feedback_dim=None, **kwargs):
        self.num_outputs = num_outputs
        self.feedback_dim = feedback_dim

        self.lookup = LookupTable(num_outputs, feedback_dim)
        children = [self.lookup] + kwargs.get('children', [])
        super(LookupFeedback, self).__init__(children=children, **kwargs)
    def __init__(self, x, y, vocab_size, hidden_size, num_layers, pretrained_embeds=None):
        """
        Implements a neural language model using an LSTM.
        Word y_n+1 ~ Softmax(U * h_n)
        :param x A minibatch: each row is an instance (a sequence),
            with batch_size rows
        :param y x shifted by 1, which are the target words to predict
            for the language modeling objective based on the hidden LSTM
            state
        :param vocab_size The number of types in the training data
        :param hidden_size The dimensionality of the word embeddings
        :param pretrained_embeds Pretrained embeddings for initailization as an ND array
        """
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # Initialize the word embedding table.  If we have pretrained embeddings, we use those
        self.word_embedding_lookup = LookupTable(length=vocab_size, dim=hidden_size, name="word_embeddings")
        if pretrained_embeds is None:
            initialize(self.word_embedding_lookup, 0.8)
        else:
            assert pretrained_embeds.shape[0] == vocab_size and pretrained_embeds.shape[1] == hidden_size
            self.word_embedding_lookup.weights_init = Constant(pretrained_embeds)
            self.word_embedding_lookup.biases_init = Constant(0)
            self.word_embedding_lookup.initialize()

        self.word_embeddings = self.word_embedding_lookup.W

        self.y_hat, self.cost, self.cells = self.nn_fprop(x, y, num_layers)
    def __init__(self, vocab_size, embedding_dim, state_dim, **kwargs):
        super(BidirectionalEncoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim

        self.lookup = LookupTable(name='embeddings')
        self.bidir = BidirectionalWMT15(
            GatedRecurrent(activation=Tanh(), dim=state_dim))
        self.fwd_fork = Fork([
            name
            for name in self.bidir.prototype.apply.sequences if name != 'mask'
        ],
                             prototype=Linear(),
                             name='fwd_fork')
        self.back_fork = Fork([
            name
            for name in self.bidir.prototype.apply.sequences if name != 'mask'
        ],
                              prototype=Linear(),
                              name='back_fork')

        self.children = [
            self.lookup, self.bidir, self.fwd_fork, self.back_fork
        ]
    def __init__(self, morpho_idxs, masks, word_idxs, morpho_vocab_size,
                 hidden_size, word_embeds):
        """
        Implements a morpheme-level prior by computing the sum of KL-Div
        of the elements of the morpheme embeddings and the word embeddings
        (where these elements are in [0,1] and are taken as Bernoulli dists).
        :param morpho_idxs A 3D tensor of batch_size x seq_length x max_morphemes_per_word
            Where the 3rd dimension is morpheme indices, padded with 0's so all words have
            the same morpheme decomposition length
        :param masks A 4D tensor of bits which select which values in morpho_idxs are
            padding and which are actual morphemes.  4D is needed for broadcasting
        :param word_idxs A 2D matrix of batch_size x seq_length of word indices
        :param morpho_vocab_size the number of morpheme types seen in training data
        :param hidden_size the dimensionality of morpheme / word embeddings
        :param word_embeds the unconstrained word embeddings from the language model
        """
        self.morpho_vocab_size = morpho_vocab_size
        self.hidden_size = hidden_size
        self.word_embed_lookup = word_embeds  # These are the unconstrained word embeddings

        self.morpho_embed_lookup = LookupTable(length=morpho_vocab_size,
                                               dim=hidden_size,
                                               name="morpho_embeddings")
        initialize(self.morpho_embed_lookup, 0.8)

        self.cost = self.compute_cost(morpho_idxs, masks, word_idxs)
        self.cost.name = "morpho_cost"

        self.norm = self.morpho_embed_lookup.W.norm(2)
        self.norm.name = "morpho_embed_norm"
Beispiel #13
0
 def __init__(self,
              vocab_size,
              embedding_dim,
              igru_state_dim,
              emitter=None,
              feedback_brick=None,
              merge=None,
              merge_prototype=None,
              post_merge=None,
              merged_dim=None,
              igru=None,
              **kwargs):
     self.igru = igru
     self.lookup = LookupTable(name='embeddings')
     self.vocab_size = vocab_size
     self.igru_state_dim = igru_state_dim
     self.gru_to_softmax = Linear(input_dim=igru_state_dim,
                                  output_dim=vocab_size)
     self.embedding_dim = embedding_dim
     self.gru_fork = Fork([
         name for name in self.igru.apply.sequences
         if name != 'mask' and name != 'input_states'
     ],
                          prototype=Linear(),
                          name='gru_fork')
     kwargs['children'] = [
         self.igru, self.lookup, self.gru_to_softmax, self.gru_fork
     ]
     super(Interpolator, self).__init__(emitter=emitter,
                                        feedback_brick=feedback_brick,
                                        merge=merge,
                                        merge_prototype=merge_prototype,
                                        post_merge=post_merge,
                                        merged_dim=merged_dim,
                                        **kwargs)
Beispiel #14
0
	def __init__(self, dimen, vocab_size): #{
		# No idea what this is doing, but otherwise "allocated" is not set
		super(MorphGen, self).__init__(self)

		# The encoder 
		encoder = Bidirectional(SimpleRecurrent(dim=dimen, activation=Tanh()))

		# What is this doing ? 
		fork = Fork([name for name in encoder.prototype.apply.sequences if name != 'mask'])
		fork.input_dim = dimen
		fork.output_dims = [encoder.prototype.get_dim(name) for name in fork.input_names]

		lookup = LookupTable(vocab_size, dimen)

		transition = SimpleRecurrent(dim=dimen, activation=Tanh(), name="transition")

		atten = SequenceContentAttention(state_names=transition.apply.states,attended_dim=2*dimen, match_dim=dimen, name="attention")

		readout = Readout(
			readout_dim=vocab_size,
			source_names=[transition.apply.states[0],
			atten.take_glimpses.outputs[0]],
			emitter=SoftmaxEmitter(name="emitter"),
			feedback_brick=LookupFeedback(vocab_size, dimen),
			name="readout");

		generator = SequenceGenerator(readout=readout, transition=transition, attention=atten,name="generator")
	
		self.lookup = lookup
		self.fork = fork
		self.encoder = encoder
		self.generator = generator
		self.children = [lookup, fork, encoder, generator]
Beispiel #15
0
    def __init__(self, num_outputs=None, feedback_dim=None, **kwargs):
        self.num_outputs = num_outputs
        self.feedback_dim = feedback_dim

        self.lookup = LookupTable(num_outputs, feedback_dim)
        children = [self.lookup]
        kwargs.setdefault('children', []).extend(children)
        super(LookupFeedback, self).__init__(**kwargs)
    def __init__(self, num_outputs=None, feedback_dim=None, **kwargs):
        super(LookupFeedback, self).__init__(**kwargs)
        self.num_outputs = num_outputs
        self.feedback_dim = feedback_dim

        self.lookup = LookupTable(num_outputs, feedback_dim,
                                  weights_init=self.weights_init)
        self.children = [self.lookup]
Beispiel #17
0
    def __init__(self,
                 vocab_size,
                 embedding_dim,
                 igru_state_dim,
                 igru_depth,
                 trg_dgru_depth,
                 emitter,
                 feedback_brick,
                 merge=None,
                 merge_prototype=None,
                 post_merge=None,
                 **kwargs):
        merged_dim = igru_state_dim
        if not merge:
            merge = Merge(input_names=kwargs['source_names'],
                          prototype=merge_prototype)
        if not post_merge:
            post_merge = Bias(dim=merged_dim)

        # for compatible
        if igru_depth == 1:
            self.igru = IGRU(dim=igru_state_dim)
        else:
            self.igru = RecurrentStack(
                [IGRU(dim=igru_state_dim, name='igru')] + [
                    UpperIGRU(dim=igru_state_dim,
                              activation=Tanh(),
                              name='upper_igru' + str(i))
                    for i in range(1, igru_depth)
                ],
                skip_connections=True)
        self.embedding_dim = embedding_dim
        self.emitter = emitter
        self.feedback_brick = feedback_brick
        self.merge = merge
        self.post_merge = post_merge
        self.merged_dim = merged_dim
        self.igru_depth = igru_depth
        self.trg_dgru_depth = trg_dgru_depth
        self.lookup = LookupTable(name='embeddings')
        self.vocab_size = vocab_size
        self.igru_state_dim = igru_state_dim
        self.gru_to_softmax = Linear(input_dim=igru_state_dim,
                                     output_dim=vocab_size)
        self.gru_fork = Fork([
            name for name in self.igru.apply.sequences
            if name != 'mask' and name != 'input_states'
        ],
                             prototype=Linear(),
                             name='gru_fork')

        children = [
            self.emitter, self.feedback_brick, self.merge, self.post_merge,
            self.igru, self.lookup, self.gru_to_softmax, self.gru_fork
        ]
        kwargs.setdefault('children', []).extend(children)
        super(Interpolator, self).__init__(**kwargs)
    def __init__(self, dim, **kwargs):
        super(LookupBottom, self).__init__(**kwargs)
        self.dim = dim

        self.mask = tensor.matrix('inputs_mask')
        self.batch_inputs = {'inputs': tensor.lmatrix('inputs')}
        self.single_inputs = {'inputs': tensor.lvector('inputs')}

        self.children = [LookupTable(self.input_num_chars['inputs'], self.dim)]
Beispiel #19
0
    def create_model(self):
        input_dim = self.input_dim
        x = self.x
        y = self.y
        p = self.p
        mask = self.mask
        hidden_dim = self.hidden_dim
        embedding_dim = self.embedding_dim
        lookup = LookupTable(self.dict_size,
                             embedding_dim,
                             weights_init=IsotropicGaussian(0.001),
                             name='LookupTable')
        x_to_h = Linear(embedding_dim,
                        hidden_dim * 4,
                        name='x_to_h',
                        weights_init=IsotropicGaussian(0.001),
                        biases_init=Constant(0.0))
        lstm = LSTM(hidden_dim,
                    name='lstm',
                    weights_init=IsotropicGaussian(0.001),
                    biases_init=Constant(0.0))
        h_to_o = MLP([Logistic()], [hidden_dim, 1],
                     weights_init=IsotropicGaussian(0.001),
                     biases_init=Constant(0),
                     name='h_to_o')

        lookup.initialize()
        x_to_h.initialize()
        lstm.initialize()
        h_to_o.initialize()

        embed = lookup.apply(x).reshape(
            (x.shape[0], x.shape[1], self.embedding_dim))
        embed.name = "embed_vec"
        x_transform = x_to_h.apply(embed.transpose(1, 0, 2))
        x_transform.name = "Transformed X"
        self.lookup = lookup
        self.x_to_h = x_to_h
        self.lstm = lstm
        self.h_to_o = h_to_o

        #if mask is None:
        h, c = lstm.apply(x_transform)
        #else:
        #h, c = lstm.apply(x_transform, mask=mask)
        h.name = "hidden_state"
        c.name = "cell state"
        # only values of hidden units of the last timeframe are used for
        # the classification
        indices = T.sum(mask, axis=0) - 1
        rel_hid = h[indices, T.arange(h.shape[1])]
        out = self.h_to_o.apply(rel_hid)

        probs = out
        return probs
 def __init__(self, config, **kwargs):
     super(ContextEmbedder, self).__init__(**kwargs)
     self.dim_embeddings = config.dim_embeddings
     self.embed_weights_init = config.embed_weights_init
     self.inputs = [name for (name, _, _) in self.dim_embeddings]
     self.outputs = ['%s_embedded' % name for name in self.inputs]
     self.lookups = {
         name: LookupTable(name='%s_lookup' % name)
         for name in self.inputs
     }
     self.children = self.lookups.values()
Beispiel #21
0
def nn_fprop(x, y, vocab_size, hidden_size, num_layers, model):
    lookup = LookupTable(length=vocab_size, dim=hidden_size)
    initialize([lookup])
    h = lookup.apply(x)
    for i in range(num_layers):
        if model == 'rnn':
            h = rnn_layer(hidden_size, h, i)
        if model == 'gru':
            h = gru_layer(hidden_size, h, i)
        if model == 'lstm':
            h = lstm_layer(hidden_size, h, i)
    return softmax_layer(h, y, vocab_size, hidden_size)
Beispiel #22
0
 def __init__(self, vocab_size, topical_embedding_dim, state_dim,word_num,batch_size,
              **kwargs):
     super(topicalq_transformer, self).__init__(**kwargs)
     self.vocab_size = vocab_size;
     self.word_embedding_dim = topical_embedding_dim;
     self.state_dim = state_dim;
     self.word_num=word_num;
     self.batch_size=batch_size;
     self.look_up=LookupTable(name='topical_embeddings');
     self.transformer=MLP(activations=[Tanh()],
                             dims=[self.word_embedding_dim*self.word_num, self.state_dim],
                             name='topical_transformer');
     self.children = [self.look_up,self.transformer];
Beispiel #23
0
def test_lookup_table():
    lt = LookupTable(5, 3)
    lt.allocate()

    lt.W.set_value(numpy.arange(15).reshape(5, 3).astype(theano.config.floatX))

    x = tensor.lmatrix("x")
    y = lt.apply(x)
    f = theano.function([x], [y])

    x_val = [[1, 2], [0, 3]]
    desired = numpy.array([[[3, 4, 5], [6, 7, 8]], [[0, 1, 2], [9, 10, 11]]],
                          dtype=theano.config.floatX)
    assert_equal(f(x_val)[0], desired)
Beispiel #24
0
    def __init__(self, dimension, input_size, embed_input=False, **kwargs):
        super(LSTMEncoder, self).__init__(**kwargs)
        if embed_input:
            self.embedder = LookupTable(input_size, dimension)
        else:
            self.embedder = Linear(input_size, dimension)
        self.fork = Fork(['inputs'],
                         dimension,
                         output_dims=[dimension],
                         prototype=Linear(dimension, 4 * dimension))
        encoder = Bidirectional(LSTM(dim=dimension, activation=Tanh()))

        self.encoder = encoder
        self.children = [encoder, self.embedder, self.fork]
Beispiel #25
0
 def __init__(self, vocab_size, embedding_dim, n_layers, skip_connections,
              state_dim, **kwargs):
     """Sole constructor.
     
     Args:
         vocab_size (int): Source vocabulary size
         embedding_dim (int): Dimension of the embedding layer
         n_layers (int): Number of layers. Layers share the same
                         weight matrices.
         skip_connections (bool): Skip connections connect the
                                  source word embeddings directly 
                                  with deeper layers to propagate 
                                  the gradient more efficiently
         state_dim (int): Number of hidden units in the recurrent
                          layers.
     """
     super(DeepBidirectionalEncoder, self).__init__(**kwargs)
     self.vocab_size = vocab_size
     self.embedding_dim = embedding_dim
     self.n_layers = n_layers
     self.state_dim = state_dim
     self.skip_connections = skip_connections
     self.lookup = LookupTable(name='embeddings')
     self.bidirs = []
     self.fwd_forks = []
     self.back_forks = []
     for i in xrange(self.n_layers):
         bidir = BidirectionalWMT15(GatedRecurrent(activation=Tanh(),
                                                   dim=state_dim),
                                    name='bidir%d' % i)
         self.bidirs.append(bidir)
         self.fwd_forks.append(
             Fork([
                 name for name in bidir.prototype.apply.sequences
                 if name != 'mask'
             ],
                  prototype=Linear(),
                  name='fwd_fork%d' % i))
         self.back_forks.append(
             Fork([
                 name for name in bidir.prototype.apply.sequences
                 if name != 'mask'
             ],
                  prototype=Linear(),
                  name='back_fork%d' % i))
     self.children = [self.lookup] \
                     + self.bidirs \
                     + self.fwd_forks \
                     + self.back_forks
Beispiel #26
0
def create_rnn(hidden_dim, vocab_dim, mode="rnn"):
    # input
    x = tensor.imatrix('inchar')
    y = tensor.imatrix('outchar')

    #
    W = LookupTable(
        name="W1",
        #dim = hidden_dim*4,
        dim=hidden_dim,
        length=vocab_dim,
        weights_init=initialization.IsotropicGaussian(0.01),
        biases_init=initialization.Constant(0))
    if mode == "lstm":
        # Long Short Term Memory
        H = LSTM(hidden_dim,
                 name='H',
                 weights_init=initialization.IsotropicGaussian(0.01),
                 biases_init=initialization.Constant(0.0))
    else:
        # recurrent history weight
        H = SimpleRecurrent(
            name="H",
            dim=hidden_dim,
            activation=Tanh(),
            weights_init=initialization.IsotropicGaussian(0.01))
    #
    S = Linear(name="W2",
               input_dim=hidden_dim,
               output_dim=vocab_dim,
               weights_init=initialization.IsotropicGaussian(0.01),
               biases_init=initialization.Constant(0))

    A = NDimensionalSoftmax(name="softmax")

    initLayers([W, H, S])
    activations = W.apply(x)
    hiddens = H.apply(activations)  #[0]
    activations2 = S.apply(hiddens)
    y_hat = A.apply(activations2, extra_ndim=1)
    cost = A.categorical_cross_entropy(y, activations2, extra_ndim=1).mean()

    cg = ComputationGraph(cost)
    #print VariableFilter(roles=[WEIGHT])(cg.variables)
    #W1,H,W2 = VariableFilter(roles=[WEIGHT])(cg.variables)

    layers = (x, W, H, S, A, y)

    return cg, layers, y_hat, cost
Beispiel #27
0
    def __init__(self, dimension, input_size, embed_input=False, **kwargs):
        super(SimpleEncoder, self).__init__(**kwargs)
        if embed_input:
            self.embedder = LookupTable(input_size, dimension)
        else:
            self.embedder = Linear(input_size, dimension)
        self.transform = MLP([Tanh()], [dimension, dimension])
        self.fork = Fork(['inputs'],
                         dimension,
                         output_dims=[dimension],
                         prototype=Linear(dimension, dimension))
        encoder = Bidirectional(
            SimpleRecurrent(dim=dimension, activation=Tanh()))

        self.encoder = encoder
        self.children = [encoder, self.embedder, self.transform, self.fork]
Beispiel #28
0
    def __init__(self,
                 num_input_words,
                 emb_dim,
                 dim,
                 vocab,
                 lookup=None,
                 translate=True,
                 normalize=True,
                 **kwargs):

        if num_input_words > 0:
            logger.info("Restricting def vocab to " + str(num_input_words))
            self._num_input_words = num_input_words
        else:
            self._num_input_words = vocab.size()

        self._vocab = vocab
        self._translate = translate
        self._normalize = normalize

        children = []

        if lookup is None:
            logger.info("emb_dim={}".format(emb_dim))
            self._def_lookup = LookupTable(self._num_input_words,
                                           emb_dim,
                                           name='def_lookup')
        else:
            self._def_lookup = lookup

        # Makes sense for shared lookup. Then we precondition embeddings.
        # Doesn't makes otherwise (WH = W')
        # TODO(kudkudak): Refactor redundant translate parameter
        if self._translate:
            if emb_dim == dim:
                raise Exception("Redundant layer")

            self._def_translate = Linear(emb_dim, dim, name='def_translate')
            children.extend([self._def_translate])
        else:
            if emb_dim != dim:
                raise Exception("Please pass translate=True if emb_dim != dim")

        children.append(self._def_lookup)

        super(MeanPoolReadDefinitions, self).__init__(children=children,
                                                      **kwargs)
Beispiel #29
0
def nn_fprop(x,
             x_mask,
             y,
             y_mask,
             lens,
             vocab_size,
             hidden_size,
             num_layers,
             model,
             boosting=False,
             **kwargs):
    lookup = LookupTable(length=vocab_size, dim=hidden_size)
    initialize([lookup])
    h = lookup.apply(x)
    first = True
    for i in range(num_layers):
        if model == 'rnn':
            h = rnn_layer(hidden_size,
                          h,
                          i,
                          x_mask=x_mask,
                          first=first,
                          **kwargs)
        elif model == 'gru':
            h = gru_layer(hidden_size,
                          h,
                          i,
                          x_mask=x_mask,
                          first=first,
                          **kwargs)
        elif model == 'lstm':
            h = lstm_layer(hidden_size,
                           h,
                           i,
                           x_mask=x_mask,
                           first=first,
                           **kwargs)
        else:
            print("models must either be rnn or lstm")
            sys.exit(0)
        first = False

    return softmax_layer(h, y, x_mask, y_mask, lens, vocab_size, hidden_size,
                         boosting)
Beispiel #30
0
 def build_model(self, x, config):
     logger.info('building %s model for: %s ', self.nn_model, self.name)
     vocabsize = self.get_vocab_size()
     logger.info('%s vocab size is: %d', self.name, vocabsize)
     self.embeddings, self.dim_emb = self.get_embeddings()
     if self.tune_tune:
         logger.info('%s lookuptable with size (%d, %d) will be tuned.',
                     self.name, vocabsize, self.dim_emb)
         lookup = LookupTable(length=vocabsize, dim=self.dim_emb)
         lookup.allocate()
         #             add_role(lookup.W, WEIGHT)
         lookup.W.name = 'lt.W'
     else:
         logger.info('%s lookuptable with size (%d, %d) will NOT be tuned.',
                     self.name, vocabsize, self.dim_emb)
         lookup = MyLookupTable(length=vocabsize, dim=self.dim_emb)
         lookup.allocate()
     lookup.name = self.name + 'lookuptable'
     lookup.W.set_value(self.embeddings)
     xemb = lookup.apply(x)
     xemb = debug_print(xemb, 'xemb', False)
     if 'cnn' in self.nn_model:
         logger.info('CNN')
         feature_vec, feature_vec_len = create_cnn_general(
             xemb, self.dim_emb, self.max_len, config, self.name)
     elif self.nn_model == 'lstm':
         feature_vec, feature_vec_len = create_lstm(xemb, self.dim_emb,
                                                    False, config,
                                                    self.name)
     elif self.nn_model == 'bilstm':
         feature_vec, feature_vec_len = create_lstm(xemb, self.dim_emb,
                                                    True, config, self.name)
     elif self.nn_model == 'rnn':
         feature_vec, feature_vec_len = create_rnn(xemb, self.dim_emb,
                                                   config, self.name)
     elif self.nn_model == 'ff':
         feature_vec, feature_vec_len = create_ff(xemb, self.dim_emb,
                                                  self.max_len, config)
     elif self.nn_model == 'mean':
         feature_vec, feature_vec_len = create_mean(xemb, self.dim_emb,
                                                    self.max_len, config)
     return feature_vec, feature_vec_len