def __init__(self, x, y, vocab_size, hidden_size, num_layers, pretrained_embeds=None):
        """
        Implements a neural language model using an LSTM.
        Word y_n+1 ~ Softmax(U * h_n)
        :param x A minibatch: each row is an instance (a sequence),
            with batch_size rows
        :param y x shifted by 1, which are the target words to predict
            for the language modeling objective based on the hidden LSTM
            state
        :param vocab_size The number of types in the training data
        :param hidden_size The dimensionality of the word embeddings
        :param pretrained_embeds Pretrained embeddings for initailization as an ND array
        """
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # Initialize the word embedding table.  If we have pretrained embeddings, we use those
        self.word_embedding_lookup = LookupTable(length=vocab_size, dim=hidden_size, name="word_embeddings")
        if pretrained_embeds is None:
            initialize(self.word_embedding_lookup, 0.8)
        else:
            assert pretrained_embeds.shape[0] == vocab_size and pretrained_embeds.shape[1] == hidden_size
            self.word_embedding_lookup.weights_init = Constant(pretrained_embeds)
            self.word_embedding_lookup.biases_init = Constant(0)
            self.word_embedding_lookup.initialize()

        self.word_embeddings = self.word_embedding_lookup.W

        self.y_hat, self.cost, self.cells = self.nn_fprop(x, y, num_layers)
예제 #2
0
    def __init__(self, num_outputs=None, feedback_dim=None, **kwargs):
        super(LookupFeedback, self).__init__(**kwargs)
        update_instance(self, locals())

        self.lookup = LookupTable(num_outputs, feedback_dim,
                                  weights_init=self.weights_init)
        self.children = [self.lookup]
예제 #3
0
    def __init__(self, num_outputs=None, feedback_dim=None, **kwargs):
        self.num_outputs = num_outputs
        self.feedback_dim = feedback_dim

        self.lookup = LookupTable(num_outputs, feedback_dim)
        children = [self.lookup] + kwargs.get('children', [])
        super(LookupFeedback, self).__init__(children=children, **kwargs)
예제 #4
0
    def __init__(self, emb_dim, dim, num_input_words, 
                 num_output_words, vocab, 
                 **kwargs):
        if emb_dim == 0:
            emb_dim = dim
        if num_input_words == 0:
            num_input_words = vocab.size()
        if num_output_words == 0:
            num_output_words = vocab.size()

        self._num_input_words = num_input_words
        self._num_output_words = num_output_words
        self._vocab = vocab

        self._word_to_id = WordToIdOp(self._vocab)

        children = []

        self._main_lookup = LookupTable(self._num_input_words, emb_dim, name='main_lookup')
        self._encoder_fork = Linear(emb_dim, 4 * dim, name='encoder_fork')
        self._encoder_rnn = LSTM(dim, name='encoder_rnn')
        self._decoder_fork = Linear(emb_dim, 4 * dim, name='decoder_fork')
        self._decoder_rnn = LSTM(dim, name='decoder_rnn')
        children.extend([self._main_lookup,
                         self._encoder_fork, self._encoder_rnn,
                         self._decoder_fork, self._decoder_rnn])
        self._pre_softmax = Linear(dim, self._num_output_words)
        self._softmax = NDimensionalSoftmax()
        children.extend([self._pre_softmax, self._softmax])

        super(LanguageModel, self).__init__(children=children, **kwargs)
    def __init__(self, vocab_size, embedding_dim, state_dim, **kwargs):
        super(BidirectionalEncoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim

        self.lookup = LookupTable(name='embeddings')
        self.bidir = BidirectionalWMT15(
            GatedRecurrent(activation=Tanh(), dim=state_dim))
        self.fwd_fork = Fork([
            name
            for name in self.bidir.prototype.apply.sequences if name != 'mask'
        ],
                             prototype=Linear(),
                             name='fwd_fork')
        self.back_fork = Fork([
            name
            for name in self.bidir.prototype.apply.sequences if name != 'mask'
        ],
                              prototype=Linear(),
                              name='back_fork')

        self.children = [
            self.lookup, self.bidir, self.fwd_fork, self.back_fork
        ]
예제 #6
0
    def __init__(self,
                 num_input_words,
                 emb_dim,
                 dim,
                 vocab,
                 lookup=None,
                 fork_and_rnn=None,
                 **kwargs):

        if num_input_words > 0:
            logger.info("Restricting def vocab to " + str(num_input_words))
            self._num_input_words = num_input_words
        else:
            self._num_input_words = vocab.size()

        self._vocab = vocab

        children = []

        if lookup is None:
            self._def_lookup = LookupTable(self._num_input_words,
                                           emb_dim,
                                           name='def_lookup')
        else:
            self._def_lookup = lookup

        if fork_and_rnn is None:
            self._def_fork = Linear(emb_dim, 4 * dim, name='def_fork')
            self._def_rnn = LSTM(dim, name='def_rnn')
        else:
            self._def_fork, self._def_rnn = fork_and_rnn

        children.extend([self._def_lookup, self._def_fork, self._def_rnn])

        super(LSTMReadDefinitions, self).__init__(children=children, **kwargs)
예제 #7
0
파일: model.py 프로젝트: ishaansharma/DCNMT
    def __init__(self, vocab_size, embedding_dim, dgru_state_dim, dgru_depth,
                 **kwargs):
        super(Decimator, self).__init__(**kwargs)

        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.dgru_state_dim = dgru_state_dim
        self.embedding_dim = embedding_dim
        self.lookup = LookupTable(name='embeddings')
        self.dgru_depth = dgru_depth
        # representation
        self.dgru = RecurrentStack([
            DGRU(activation=Tanh(), dim=self.dgru_state_dim)
            for _ in range(dgru_depth)
        ],
                                   skip_connections=True)
        # importance of this representation
        self.bidir_w = Bidirectional(RecurrentWithFork(
            DGRU(activation=Tanh(), dim=self.dgru_state_dim // 2),
            self.embedding_dim,
            name='src_word_with_fork'),
                                     name='bidir_src_word_encoder')

        self.gru_fork = Fork(
            [name for name in self.dgru.apply.sequences if name != 'mask'],
            prototype=Linear(),
            name='gru_fork')
        # map to a energy scalar
        self.wl = Linear(input_dim=dgru_state_dim, output_dim=1)

        self.children = [
            self.lookup, self.dgru, self.gru_fork, self.bidir_w, self.wl
        ]
예제 #8
0
 def __init__(self,
              vocab_size,
              embedding_dim,
              igru_state_dim,
              emitter=None,
              feedback_brick=None,
              merge=None,
              merge_prototype=None,
              post_merge=None,
              merged_dim=None,
              igru=None,
              **kwargs):
     self.igru = igru
     self.lookup = LookupTable(name='embeddings')
     self.vocab_size = vocab_size
     self.igru_state_dim = igru_state_dim
     self.gru_to_softmax = Linear(input_dim=igru_state_dim,
                                  output_dim=vocab_size)
     self.embedding_dim = embedding_dim
     self.gru_fork = Fork([
         name for name in self.igru.apply.sequences
         if name != 'mask' and name != 'input_states'
     ],
                          prototype=Linear(),
                          name='gru_fork')
     kwargs['children'] = [
         self.igru, self.lookup, self.gru_to_softmax, self.gru_fork
     ]
     super(Interpolator, self).__init__(emitter=emitter,
                                        feedback_brick=feedback_brick,
                                        merge=merge,
                                        merge_prototype=merge_prototype,
                                        post_merge=post_merge,
                                        merged_dim=merged_dim,
                                        **kwargs)
예제 #9
0
파일: model.py 프로젝트: sovr610/mimicry.ai
    def __init__(
            self,
            encoder_type,
            num_characters,
            input_dim,
            encoder_dim,
            **kwargs):
        assert encoder_type in [None, 'bidirectional']
        self.encoder_type = encoder_type
        super(Encoder, self).__init__(**kwargs)

        self.children = []

        if encoder_type in ['lookup', 'bidirectional']:
            self.embed_label = LookupTable(
                num_characters,
                input_dim,
                name='embed_label')
            self.children += [
                self.embed_label]
        else:
            # If there is no encoder.
            assert num_characters == input_dim

        if encoder_type == 'bidirectional':
            transition = RecurrentWithFork(
                GatedRecurrent(dim=encoder_dim).apply,
                input_dim, name='encoder_transition')
            self.encoder = Bidirectional(transition, name='encoder')
            self.children.append(self.encoder)
    def __init__(self, morpho_idxs, masks, word_idxs, morpho_vocab_size,
                 hidden_size, word_embeds):
        """
        Implements a morpheme-level prior by computing the sum of KL-Div
        of the elements of the morpheme embeddings and the word embeddings
        (where these elements are in [0,1] and are taken as Bernoulli dists).
        :param morpho_idxs A 3D tensor of batch_size x seq_length x max_morphemes_per_word
            Where the 3rd dimension is morpheme indices, padded with 0's so all words have
            the same morpheme decomposition length
        :param masks A 4D tensor of bits which select which values in morpho_idxs are
            padding and which are actual morphemes.  4D is needed for broadcasting
        :param word_idxs A 2D matrix of batch_size x seq_length of word indices
        :param morpho_vocab_size the number of morpheme types seen in training data
        :param hidden_size the dimensionality of morpheme / word embeddings
        :param word_embeds the unconstrained word embeddings from the language model
        """
        self.morpho_vocab_size = morpho_vocab_size
        self.hidden_size = hidden_size
        self.word_embed_lookup = word_embeds  # These are the unconstrained word embeddings

        self.morpho_embed_lookup = LookupTable(length=morpho_vocab_size,
                                               dim=hidden_size,
                                               name="morpho_embeddings")
        initialize(self.morpho_embed_lookup, 0.8)

        self.cost = self.compute_cost(morpho_idxs, masks, word_idxs)
        self.cost.name = "morpho_cost"

        self.norm = self.morpho_embed_lookup.W.norm(2)
        self.norm.name = "morpho_embed_norm"
예제 #11
0
    def __init__(self, num_outputs=None, feedback_dim=None, **kwargs):
        self.num_outputs = num_outputs
        self.feedback_dim = feedback_dim

        self.lookup = LookupTable(num_outputs, feedback_dim)
        children = [self.lookup]
        kwargs.setdefault('children', []).extend(children)
        super(LookupFeedback, self).__init__(**kwargs)
    def __init__(self, num_outputs=None, feedback_dim=None, **kwargs):
        super(LookupFeedback, self).__init__(**kwargs)
        self.num_outputs = num_outputs
        self.feedback_dim = feedback_dim

        self.lookup = LookupTable(num_outputs, feedback_dim,
                                  weights_init=self.weights_init)
        self.children = [self.lookup]
예제 #13
0
파일: rnn_model.py 프로젝트: Rene90/dl4nlp
def create_rnn(hidden_dim, vocab_dim,mode="rnn"):
    # input
    x = tensor.imatrix('inchar')
    y = tensor.imatrix('outchar')

    # 
    W = LookupTable(
        name = "W1",
        #dim = hidden_dim*4,
        dim = hidden_dim,
        length = vocab_dim,
        weights_init = initialization.IsotropicGaussian(0.01),
        biases_init = initialization.Constant(0)
    )
    if mode == "lstm":
        # Long Short Term Memory
        H = LSTM(
            hidden_dim, 
            name = 'H',
            weights_init = initialization.IsotropicGaussian(0.01),
            biases_init = initialization.Constant(0.0)
        )
    else:
        # recurrent history weight
        H = SimpleRecurrent(
            name = "H",
            dim = hidden_dim,
            activation = Tanh(),
            weights_init = initialization.IsotropicGaussian(0.01)
        )
    # 
    S = Linear(
        name = "W2",
        input_dim = hidden_dim,
        output_dim = vocab_dim,
        weights_init = initialization.IsotropicGaussian(0.01),
        biases_init = initialization.Constant(0)
    )

    A = NDimensionalSoftmax(
        name = "softmax"
    )

    initLayers([W,H,S])
    activations = W.apply(x)
    hiddens = H.apply(activations)#[0]
    activations2 = S.apply(hiddens)
    y_hat = A.apply(activations2, extra_ndim=1)
    cost = A.categorical_cross_entropy(y, activations2, extra_ndim=1).mean()

    cg = ComputationGraph(cost)
    #print VariableFilter(roles=[WEIGHT])(cg.variables)
    #W1,H,W2 = VariableFilter(roles=[WEIGHT])(cg.variables)

    layers = (x, W, H, S, A, y)

    return  cg, layers, y_hat, cost
예제 #14
0
파일: model.py 프로젝트: ishaansharma/DCNMT
    def __init__(self,
                 vocab_size,
                 embedding_dim,
                 igru_state_dim,
                 igru_depth,
                 trg_dgru_depth,
                 emitter,
                 feedback_brick,
                 merge=None,
                 merge_prototype=None,
                 post_merge=None,
                 **kwargs):
        merged_dim = igru_state_dim
        if not merge:
            merge = Merge(input_names=kwargs['source_names'],
                          prototype=merge_prototype)
        if not post_merge:
            post_merge = Bias(dim=merged_dim)

        # for compatible
        if igru_depth == 1:
            self.igru = IGRU(dim=igru_state_dim)
        else:
            self.igru = RecurrentStack(
                [IGRU(dim=igru_state_dim, name='igru')] + [
                    UpperIGRU(dim=igru_state_dim,
                              activation=Tanh(),
                              name='upper_igru' + str(i))
                    for i in range(1, igru_depth)
                ],
                skip_connections=True)
        self.embedding_dim = embedding_dim
        self.emitter = emitter
        self.feedback_brick = feedback_brick
        self.merge = merge
        self.post_merge = post_merge
        self.merged_dim = merged_dim
        self.igru_depth = igru_depth
        self.trg_dgru_depth = trg_dgru_depth
        self.lookup = LookupTable(name='embeddings')
        self.vocab_size = vocab_size
        self.igru_state_dim = igru_state_dim
        self.gru_to_softmax = Linear(input_dim=igru_state_dim,
                                     output_dim=vocab_size)
        self.gru_fork = Fork([
            name for name in self.igru.apply.sequences
            if name != 'mask' and name != 'input_states'
        ],
                             prototype=Linear(),
                             name='gru_fork')

        children = [
            self.emitter, self.feedback_brick, self.merge, self.post_merge,
            self.igru, self.lookup, self.gru_to_softmax, self.gru_fork
        ]
        kwargs.setdefault('children', []).extend(children)
        super(Interpolator, self).__init__(**kwargs)
예제 #15
0
def test_lookup_table():
    lt = LookupTable(5, 3)
    lt.allocate()

    lt.W.set_value(numpy.arange(15).reshape(5, 3).astype(theano.config.floatX))

    x = tensor.lmatrix("x")
    y = lt.apply(x)
    f = theano.function([x], [y])

    x_val = [[1, 2], [0, 3]]
    desired = numpy.array([[[3, 4, 5], [6, 7, 8]], [[0, 1, 2], [9, 10, 11]]],
                          dtype=theano.config.floatX)
    assert_equal(f(x_val)[0], desired)

    # Test get_dim
    assert_equal(lt.get_dim(lt.apply.inputs[0]), 0)
    assert_equal(lt.get_dim(lt.apply.outputs[0]), lt.dim)
    assert_raises(ValueError, lt.get_dim, 'random_name')

    # Test feedforward interface
    assert lt.input_dim == 0
    assert lt.output_dim == 3
    lt.output_dim = 4
    assert lt.output_dim == 4

    def assign_input_dim():
        lt.input_dim = 11

    assert_raises(ValueError, assign_input_dim)
    lt.input_dim = 0
예제 #16
0
    def create_model(self):
        input_dim = self.input_dim
        x = self.x
        y = self.y
        p = self.p
        mask = self.mask
        hidden_dim = self.hidden_dim
        embedding_dim = self.embedding_dim
        lookup = LookupTable(self.dict_size,
                             embedding_dim,
                             weights_init=IsotropicGaussian(0.001),
                             name='LookupTable')
        x_to_h = Linear(embedding_dim,
                        hidden_dim * 4,
                        name='x_to_h',
                        weights_init=IsotropicGaussian(0.001),
                        biases_init=Constant(0.0))
        lstm = LSTM(hidden_dim,
                    name='lstm',
                    weights_init=IsotropicGaussian(0.001),
                    biases_init=Constant(0.0))
        h_to_o = MLP([Logistic()], [hidden_dim, 1],
                     weights_init=IsotropicGaussian(0.001),
                     biases_init=Constant(0),
                     name='h_to_o')

        lookup.initialize()
        x_to_h.initialize()
        lstm.initialize()
        h_to_o.initialize()

        embed = lookup.apply(x).reshape(
            (x.shape[0], x.shape[1], self.embedding_dim))
        embed.name = "embed_vec"
        x_transform = x_to_h.apply(embed.transpose(1, 0, 2))
        x_transform.name = "Transformed X"
        self.lookup = lookup
        self.x_to_h = x_to_h
        self.lstm = lstm
        self.h_to_o = h_to_o

        #if mask is None:
        h, c = lstm.apply(x_transform)
        #else:
        #h, c = lstm.apply(x_transform, mask=mask)
        h.name = "hidden_state"
        c.name = "cell state"
        # only values of hidden units of the last timeframe are used for
        # the classification
        indices = T.sum(mask, axis=0) - 1
        rel_hid = h[indices, T.arange(h.shape[1])]
        out = self.h_to_o.apply(rel_hid)

        probs = out
        return probs
예제 #17
0
def nn_fprop(x, y, vocab_size, hidden_size, num_layers, model):
    lookup = LookupTable(length=vocab_size, dim=hidden_size)
    initialize([lookup])
    h = lookup.apply(x)
    for i in range(num_layers):
        if model == 'rnn':
            h = rnn_layer(hidden_size, h, i)
        if model == 'gru':
            h = gru_layer(hidden_size, h, i)
        if model == 'lstm':
            h = lstm_layer(hidden_size, h, i)
    return softmax_layer(h, y, vocab_size, hidden_size)
예제 #18
0
def nn_fprop(x, y, vocab_size, hidden_size, num_layers, model):
    lookup = LookupTable(length=vocab_size, dim=hidden_size)
    initialize([lookup])
    h = lookup.apply(x)
    for i in range(num_layers):
        if model == 'rnn':
            h = rnn_layer(hidden_size, h, i)
        if model == 'gru':
            h = gru_layer(hidden_size, h, i)
        if model == 'lstm':
            h = lstm_layer(hidden_size, h, i)
    return softmax_layer(h, y, vocab_size, hidden_size)
예제 #19
0
 def __init__(self, vocab_size, topical_embedding_dim, state_dim,word_num,batch_size,
              **kwargs):
     super(topicalq_transformer, self).__init__(**kwargs)
     self.vocab_size = vocab_size;
     self.word_embedding_dim = topical_embedding_dim;
     self.state_dim = state_dim;
     self.word_num=word_num;
     self.batch_size=batch_size;
     self.look_up=LookupTable(name='topical_embeddings');
     self.transformer=MLP(activations=[Tanh()],
                             dims=[self.word_embedding_dim*self.word_num, self.state_dim],
                             name='topical_transformer');
     self.children = [self.look_up,self.transformer];
예제 #20
0
    def build_model(self, x, config):
        logger.info('building %s model for: %s ', self.nn_model, self.name)
        vocabsize = self.get_vocab_size()
        logger.info('%s vocab size is: %d', self.name, vocabsize)
        self.embeddings, self.dim_emb = self.get_embeddings() 
        if self.tune_tune:
            logger.info('%s lookuptable with size (%d, %d) will be tuned.', self.name, vocabsize, self.dim_emb)
            lookup = LookupTable(length=vocabsize, dim=self.dim_emb)
            lookup.allocate()
#             add_role(lookup.W, WEIGHT)
            lookup.W.name = 'lt.W'
        else:
            logger.info('%s lookuptable with size (%d, %d) will NOT be tuned.', self.name, vocabsize, self.dim_emb)
            lookup = MyLookupTable(length=vocabsize, dim=self.dim_emb)
            lookup.allocate()
        lookup.name = self.name + 'lookuptable'
        lookup.W.set_value(self.embeddings)
        xemb = lookup.apply(x)
        xemb = debug_print(xemb, 'xemb', False)
        if 'cnn' in self.nn_model:
            logger.info('CNN')
            feature_vec, feature_vec_len = create_cnn_general(xemb, self.dim_emb, self.max_len, config, self.name)
        elif self.nn_model == 'lstm':
            feature_vec, feature_vec_len = create_lstm(xemb, self.dim_emb, False, config, self.name)
        elif self.nn_model == 'bilstm':
            feature_vec, feature_vec_len = create_lstm(xemb, self.dim_emb, True, config, self.name)
        elif self.nn_model == 'rnn':
            feature_vec, feature_vec_len = create_rnn(xemb, self.dim_emb, config, self.name)
        elif self.nn_model == 'ff':
            feature_vec, feature_vec_len = create_ff(xemb, self.dim_emb, self.max_len, config)
        elif self.nn_model == 'mean':
            feature_vec, feature_vec_len = create_mean(xemb, self.dim_emb, self.max_len, config)
        return feature_vec, feature_vec_len
예제 #21
0
def test_lookup_table():
    lt = LookupTable(5, 3)
    lt.allocate()

    lt.W.set_value(numpy.arange(15).reshape(5, 3).astype(theano.config.floatX))

    x = tensor.lmatrix("x")
    y = lt.apply(x)
    f = theano.function([x], [y])

    x_val = [[1, 2], [0, 3]]
    desired = numpy.array([[[3, 4, 5], [6, 7, 8]], [[0, 1, 2], [9, 10, 11]]],
                          dtype=theano.config.floatX)
    assert_equal(f(x_val)[0], desired)
예제 #22
0
def test_lookup_table():
    lt = LookupTable(5, 3)
    lt.allocate()

    lt.W.set_value(numpy.arange(15).reshape(5, 3).astype(theano.config.floatX))

    x = tensor.lmatrix("x")
    y = lt.apply(x)
    f = theano.function([x], [y])

    x_val = [[1, 2], [0, 3]]
    desired = numpy.array([[[3, 4, 5], [6, 7, 8]], [[0, 1, 2], [9, 10, 11]]],
                          dtype=theano.config.floatX)
    assert_equal(f(x_val)[0], desired)

    # Test get_dim
    assert_equal(lt.get_dim(lt.apply.inputs[0]), 0)
    assert_equal(lt.get_dim(lt.apply.outputs[0]), lt.dim)
    assert_raises(ValueError, lt.get_dim, 'random_name')

    # Test feedforward interface
    assert lt.input_dim == 0
    assert lt.output_dim == 3
    lt.output_dim = 4
    assert lt.output_dim == 4

    def assign_input_dim():
        lt.input_dim = 11
    assert_raises(ValueError, assign_input_dim)
    lt.input_dim = 0
예제 #23
0
 def __init__(self, vocab_size, embedding_dim, n_layers, skip_connections,
              state_dim, **kwargs):
     """Sole constructor.
     
     Args:
         vocab_size (int): Source vocabulary size
         embedding_dim (int): Dimension of the embedding layer
         n_layers (int): Number of layers. Layers share the same
                         weight matrices.
         skip_connections (bool): Skip connections connect the
                                  source word embeddings directly 
                                  with deeper layers to propagate 
                                  the gradient more efficiently
         state_dim (int): Number of hidden units in the recurrent
                          layers.
     """
     super(DeepBidirectionalEncoder, self).__init__(**kwargs)
     self.vocab_size = vocab_size
     self.embedding_dim = embedding_dim
     self.n_layers = n_layers
     self.state_dim = state_dim
     self.skip_connections = skip_connections
     self.lookup = LookupTable(name='embeddings')
     self.bidirs = []
     self.fwd_forks = []
     self.back_forks = []
     for i in xrange(self.n_layers):
         bidir = BidirectionalWMT15(GatedRecurrent(activation=Tanh(),
                                                   dim=state_dim),
                                    name='bidir%d' % i)
         self.bidirs.append(bidir)
         self.fwd_forks.append(
             Fork([
                 name for name in bidir.prototype.apply.sequences
                 if name != 'mask'
             ],
                  prototype=Linear(),
                  name='fwd_fork%d' % i))
         self.back_forks.append(
             Fork([
                 name for name in bidir.prototype.apply.sequences
                 if name != 'mask'
             ],
                  prototype=Linear(),
                  name='back_fork%d' % i))
     self.children = [self.lookup] \
                     + self.bidirs \
                     + self.fwd_forks \
                     + self.back_forks
예제 #24
0
def create_rnn(hidden_dim, vocab_dim, mode="rnn"):
    # input
    x = tensor.imatrix('inchar')
    y = tensor.imatrix('outchar')

    #
    W = LookupTable(
        name="W1",
        #dim = hidden_dim*4,
        dim=hidden_dim,
        length=vocab_dim,
        weights_init=initialization.IsotropicGaussian(0.01),
        biases_init=initialization.Constant(0))
    if mode == "lstm":
        # Long Short Term Memory
        H = LSTM(hidden_dim,
                 name='H',
                 weights_init=initialization.IsotropicGaussian(0.01),
                 biases_init=initialization.Constant(0.0))
    else:
        # recurrent history weight
        H = SimpleRecurrent(
            name="H",
            dim=hidden_dim,
            activation=Tanh(),
            weights_init=initialization.IsotropicGaussian(0.01))
    #
    S = Linear(name="W2",
               input_dim=hidden_dim,
               output_dim=vocab_dim,
               weights_init=initialization.IsotropicGaussian(0.01),
               biases_init=initialization.Constant(0))

    A = NDimensionalSoftmax(name="softmax")

    initLayers([W, H, S])
    activations = W.apply(x)
    hiddens = H.apply(activations)  #[0]
    activations2 = S.apply(hiddens)
    y_hat = A.apply(activations2, extra_ndim=1)
    cost = A.categorical_cross_entropy(y, activations2, extra_ndim=1).mean()

    cg = ComputationGraph(cost)
    #print VariableFilter(roles=[WEIGHT])(cg.variables)
    #W1,H,W2 = VariableFilter(roles=[WEIGHT])(cg.variables)

    layers = (x, W, H, S, A, y)

    return cg, layers, y_hat, cost
    def __init__(self, batch_size, num_subwords, num_words, subword_embedding_size, input_vocab_size,
                 subword_RNN_hidden_state_size, table_width=0.08, init_type='xavier', **kwargs):

        super(LSTMCompositionalLayer, self).__init__(**kwargs)

        self.batch_size = batch_size
        self.num_subwords = num_subwords # number of subwords which make up a word
        self.num_words = num_words  # number of words in the sentence
        self.subword_embedding_size = subword_embedding_size
        self.input_vocab_size = input_vocab_size
        self.subword_RNN_hidden_state_size = subword_RNN_hidden_state_size
        self.table_width = table_width

        # create the look up table
        self.lookup = LookupTable(length=self.input_vocab_size, dim=self.subword_embedding_size, name='input_lookup')
        self.lookup.weights_init = Uniform(width=table_width)
        self.lookup.biases_init = Constant(0)

        if init_type == 'xavier':
            linear_init = XavierInitializationOriginal(self.subword_embedding_size, self.subword_RNN_hidden_state_size)
            lstm_init = XavierInitializationOriginal(self.subword_embedding_size, self.subword_RNN_hidden_state_size)
        else:  # default is gaussian
            linear_init = IsotropicGaussian()
            lstm_init = IsotropicGaussian()

        # The `inputs` are then split in this order: Input gates, forget gates, cells and output gates
        self.linear_forward = Linear(input_dim=self.subword_embedding_size, output_dim=self.subword_RNN_hidden_state_size * 4,
                                     name='linear_forward', weights_init=linear_init, biases_init=Constant(0.0))

        self.compositional_subword_to_word_RNN_forward = LSTM(
            dim=self.subword_RNN_hidden_state_size, activation=Tanh(), name='subword_RNN_forward',
            weights_init=lstm_init, biases_init=Constant(0.0))

        self.children = [self.lookup, self.linear_forward, self.compositional_subword_to_word_RNN_forward]
예제 #26
0
파일: __init__.py 프로젝트: jfsantos/blocks
    def __init__(self, dimension, alphabet_size, **kwargs):
        super(WordReverser, self).__init__(**kwargs)
        encoder = Bidirectional(
            SimpleRecurrent(dim=dimension, activation=Tanh()))
        fork = Fork([name for name in encoder.prototype.apply.sequences
                    if name != 'mask'])
        fork.input_dim = dimension
        fork.output_dims = [dimension for name in fork.input_names]
        lookup = LookupTable(alphabet_size, dimension)
        transition = SimpleRecurrent(
            activation=Tanh(),
            dim=dimension, name="transition")
        attention = SequenceContentAttention(
            state_names=transition.apply.states,
            attended_dim=2 * dimension, match_dim=dimension, name="attention")
        readout = Readout(
            readout_dim=alphabet_size,
            source_names=[transition.apply.states[0],
                          attention.take_glimpses.outputs[0]],
            emitter=SoftmaxEmitter(name="emitter"),
            feedback_brick=LookupFeedback(alphabet_size, dimension),
            name="readout")
        generator = SequenceGenerator(
            readout=readout, transition=transition, attention=attention,
            name="generator")

        self.lookup = lookup
        self.fork = fork
        self.encoder = encoder
        self.generator = generator
        self.children = [lookup, fork, encoder, generator]
    def __init__(self, batch_size, num_subwords, num_words, subword_embedding_size, input_vocab_size,
                 subword_RNN_hidden_state_size, add_one = True, **kwargs):

        super(CompositionalLayerToyBidirectional, self).__init__(**kwargs)

        self.batch_size = batch_size
        self.num_subwords = num_subwords # number of subwords which make up a word
        self.num_words = num_words  # number of words in the sentence
        self.subword_embedding_size = subword_embedding_size
        self.input_vocab_size = input_vocab_size
        self.subword_RNN_hidden_state_size = subword_RNN_hidden_state_size
        self.add_one = add_one #adds 1 to the backwards embeddings

        # create the look up table
        self.lookup = LookupTable(length=self.input_vocab_size, dim=self.subword_embedding_size, name='input_lookup')
        self.lookup.weights_init = Uniform(width=0.08)
        self.lookup.biases_init = Constant(0)

        # has one RNN which reads the subwords into a word embedding
        self.compositional_subword_to_word_RNN_forward = SimpleRecurrent(
            dim=self.subword_RNN_hidden_state_size, activation=Identity(), name='subword_RNN_forward',
            weights_init=Identity_init())

        self.compositional_subword_to_word_RNN_backward = SimpleRecurrent(
            dim=self.subword_RNN_hidden_state_size, activation=Identity(), name='subword_RNN_backward',
            weights_init=Identity_init())

        self.children = [self.lookup, self.compositional_subword_to_word_RNN_forward,
                         self.compositional_subword_to_word_RNN_backward]
class BidirectionalEncoder(Initializable):
    """Encoder of RNNsearch model."""
    def __init__(self, vocab_size, embedding_dim, state_dim, **kwargs):
        super(BidirectionalEncoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim

        self.lookup = LookupTable(name='embeddings')
        self.bidir = NewBidirectional(
            GatedRecurrent(activation=Tanh(), dim=state_dim))
        self.fwd_fork = Fork([
            name
            for name in self.bidir.prototype.apply.sequences if name != 'mask'
        ],
                             prototype=Linear(),
                             name='fwd_fork')
        self.back_fork = Fork([
            name
            for name in self.bidir.prototype.apply.sequences if name != 'mask'
        ],
                              prototype=Linear(),
                              name='back_fork')

        self.children = [
            self.lookup, self.bidir, self.fwd_fork, self.back_fork
        ]

    def _push_allocation_config(self):
        self.lookup.length = self.vocab_size
        self.lookup.dim = self.embedding_dim

        self.fwd_fork.input_dim = self.embedding_dim
        self.fwd_fork.output_dims = [
            self.bidir.children[0].get_dim(name)
            for name in self.fwd_fork.output_names
        ]
        self.back_fork.input_dim = self.embedding_dim
        self.back_fork.output_dims = [
            self.bidir.children[1].get_dim(name)
            for name in self.back_fork.output_names
        ]

    @application(inputs=['source_sentence', 'source_sentence_mask'],
                 outputs=['representation'])
    def apply(self, source_sentence, source_sentence_mask):
        # Time as first dimension.
        source_sentence = source_sentence.T
        source_sentence_mask = source_sentence_mask.T

        embeddings = self.lookup.apply(source_sentence)

        representation = self.bidir.apply(
            # Conversion to embedding representation here.
            merge(self.fwd_fork.apply(embeddings, as_dict=True),
                  {'mask': source_sentence_mask}),
            merge(self.back_fork.apply(embeddings, as_dict=True),
                  {'mask': source_sentence_mask}))
        self.representation = representation
        return representation
예제 #29
0
class LookupFeedback(AbstractFeedback, Initializeable):
    """A feedback brick for the case when readout are integers.

    Stores and retrieves distributed representations of integers.

    Notes
    -----
        Currently works only with lazy initialization
        (can not be initialized with a single constructor call).

    """
    def __init__(self, num_outputs=None, feedback_dim=None, **kwargs):
        super(LookupFeedback, self).__init__(**kwargs)
        update_instance(self, locals())

        self.lookup = LookupTable(num_outputs, feedback_dim,
                                  kwargs.get("weights_init"))
        self.children = [self.lookup]

    def _push_allocation_config(self):
        self.lookup.length = self.num_outputs
        self.lookup.dim = self.feedback_dim

    @application
    def feedback(self, outputs, **kwargs):
        assert self.output_dim == 0
        return self.lookup.lookup(outputs)

    def get_dim(self, name):
        if name == 'feedback':
            return self.feedback_dim
        return super(LookupFeedback, self).get_dim(name)
예제 #30
0
    def __init__(self, num_outputs=None, feedback_dim=None, **kwargs):
        super(LookupFeedback, self).__init__(**kwargs)
        update_instance(self, locals())

        self.lookup = LookupTable(num_outputs, feedback_dim,
                                  kwargs.get("weights_init"))
        self.children = [self.lookup]
예제 #31
0
class LookupFeedback(AbstractFeedback, Initializable):
    """A feedback brick for the case when readout are integers.

    Stores and retrieves distributed representations of integers.

    Notes
    -----
    Currently works only with lazy initialization (can not be initialized
    with a single constructor call).

    """
    def __init__(self, num_outputs=None, feedback_dim=None, **kwargs):
        super(LookupFeedback, self).__init__(**kwargs)
        update_instance(self, locals())

        self.lookup = LookupTable(num_outputs, feedback_dim,
                                  weights_init=self.weights_init)
        self.children = [self.lookup]

    def _push_allocation_config(self):
        self.lookup.length = self.num_outputs
        self.lookup.dim = self.feedback_dim

    @application
    def feedback(self, outputs, **kwargs):
        assert self.output_dim == 0
        return self.lookup.lookup(outputs)

    def get_dim(self, name):
        if name == 'feedback':
            return self.feedback_dim
        return super(LookupFeedback, self).get_dim(name)
    def __init__(self, morpho_idxs, masks, word_idxs, morpho_vocab_size, hidden_size, word_embeds):
        """
        Implements a morpheme-level prior by computing the sum of KL-Div
        of the elements of the morpheme embeddings and the word embeddings
        (where these elements are in [0,1] and are taken as Bernoulli dists).
        :param morpho_idxs A 3D tensor of batch_size x seq_length x max_morphemes_per_word
            Where the 3rd dimension is morpheme indices, padded with 0's so all words have
            the same morpheme decomposition length
        :param masks A 4D tensor of bits which select which values in morpho_idxs are
            padding and which are actual morphemes.  4D is needed for broadcasting
        :param word_idxs A 2D matrix of batch_size x seq_length of word indices
        :param morpho_vocab_size the number of morpheme types seen in training data
        :param hidden_size the dimensionality of morpheme / word embeddings
        :param word_embeds the unconstrained word embeddings from the language model
        """
        self.morpho_vocab_size = morpho_vocab_size
        self.hidden_size = hidden_size
        self.word_embed_lookup = word_embeds # These are the unconstrained word embeddings

        self.morpho_embed_lookup = LookupTable(length=morpho_vocab_size,
                dim=hidden_size, name="morpho_embeddings")
        initialize(self.morpho_embed_lookup, 0.8)

        self.cost = self.compute_cost(morpho_idxs, masks, word_idxs)
        self.cost.name = "morpho_cost"

        self.norm = self.morpho_embed_lookup.W.norm(2)
        self.norm.name = "morpho_embed_norm"
예제 #33
0
class LookupFeedback(AbstractFeedback, Initializable):
    """A feedback brick for the case when readout are integers.

    Stores and retrieves distributed representations of integers.

    """
    def __init__(self, num_outputs=None, feedback_dim=None, **kwargs):
        super(LookupFeedback, self).__init__(**kwargs)
        self.num_outputs = num_outputs
        self.feedback_dim = feedback_dim

        self.lookup = LookupTable(num_outputs,
                                  feedback_dim,
                                  weights_init=self.weights_init)
        self.children = [self.lookup]

    def _push_allocation_config(self):
        self.lookup.length = self.num_outputs
        self.lookup.dim = self.feedback_dim

    @application
    def feedback(self, outputs):
        assert self.output_dim == 0
        return self.lookup.apply(outputs)

    def get_dim(self, name):
        if name == 'feedback':
            return self.feedback_dim
        return super(LookupFeedback, self).get_dim(name)
예제 #34
0
class topicalq_transformer(Initializable):

    def __init__(self, vocab_size, topical_embedding_dim, state_dim,word_num,batch_size,
                 **kwargs):
        super(topicalq_transformer, self).__init__(**kwargs)
        self.vocab_size = vocab_size;
        self.word_embedding_dim = topical_embedding_dim;
        self.state_dim = state_dim;
        self.word_num=word_num;
        self.batch_size=batch_size;
        self.look_up=LookupTable(name='topical_embeddings');
        self.transformer=MLP(activations=[Tanh()],
                                dims=[self.word_embedding_dim*self.word_num, self.state_dim],
                                name='topical_transformer');
        self.children = [self.look_up,self.transformer];

    def _push_allocation_config(self):
        self.look_up.length = self.vocab_size
        self.look_up.dim = self.word_embedding_dim


    # do we have to push_config? remain unsure
    @application(inputs=['source_topical_word_sequence'],
                 outputs=['topical_embedding'])
    def apply(self, source_topical_word_sequence):
        # Time as first dimension
        source_topical_word_sequence=source_topical_word_sequence.T;
        word_topical_embeddings = self.look_up.apply(source_topical_word_sequence);
        word_topical_embeddings=word_topical_embeddings.swapaxes(0,1);
        #requires testing
        concatenated_topical_embeddings=tensor.reshape(word_topical_embeddings,[word_topical_embeddings.shape[0],word_topical_embeddings.shape[1]*word_topical_embeddings.shape[2]]);
        topical_embedding=self.transformer.apply(concatenated_topical_embeddings);
        return topical_embedding
예제 #35
0
    def __init__(self, num_outputs=None, feedback_dim=None, **kwargs):
        self.num_outputs = num_outputs
        self.feedback_dim = feedback_dim

        self.lookup = LookupTable(num_outputs, feedback_dim)
        children = [self.lookup] + kwargs.get('children', [])
        super(LookupFeedback, self).__init__(children=children, **kwargs)
예제 #36
0
    def __init__(self, dimension, alphabet_size, **kwargs):
        super(SimpleGenerator, self).__init__(**kwargs)
        lookup = LookupTable(alphabet_size, dimension)
        transition = SimpleRecurrent(activation=Tanh(),
                                     dim=dimension,
                                     name="transition")
        attention = SequenceContentAttention(
            state_names=transition.apply.states,
            attended_dim=dimension,
            match_dim=dimension,
            name="attention")
        readout = Readout(readout_dim=alphabet_size,
                          source_names=[
                              transition.apply.states[0],
                              attention.take_glimpses.outputs[0]
                          ],
                          emitter=SoftmaxEmitter(name="emitter"),
                          feedback_brick=LookupFeedback(
                              alphabet_size, dimension),
                          name="readout")
        generator = SequenceGenerator(readout=readout,
                                      transition=transition,
                                      attention=attention,
                                      name="generator")

        self.lookup = lookup
        self.generator = generator
        self.children = [lookup, generator]
예제 #37
0
class LookupFeedback(AbstractFeedback, Initializable):
    """A feedback brick for the case when readout are integers.

    Stores and retrieves distributed representations of integers.

    """
    def __init__(self, num_outputs=None, feedback_dim=None, **kwargs):
        self.num_outputs = num_outputs
        self.feedback_dim = feedback_dim

        self.lookup = LookupTable(num_outputs, feedback_dim)
        children = [self.lookup]
        kwargs.setdefault('children', []).extend(children)
        super(LookupFeedback, self).__init__(**kwargs)

    def _push_allocation_config(self):
        self.lookup.length = self.num_outputs
        self.lookup.dim = self.feedback_dim

    @application
    def feedback(self, outputs):
        assert self.output_dim == 0
        return self.lookup.apply(outputs)

    def get_dim(self, name):
        if name == 'feedback':
            return self.feedback_dim
        return super(LookupFeedback, self).get_dim(name)
예제 #38
0
	def __init__(self, dimen, vocab_size): #{
		# No idea what this is doing, but otherwise "allocated" is not set
		super(MorphGen, self).__init__(self)

		# The encoder 
		encoder = Bidirectional(SimpleRecurrent(dim=dimen, activation=Tanh()))

		# What is this doing ? 
		fork = Fork([name for name in encoder.prototype.apply.sequences if name != 'mask'])
		fork.input_dim = dimen
		fork.output_dims = [encoder.prototype.get_dim(name) for name in fork.input_names]

		lookup = LookupTable(vocab_size, dimen)

		transition = SimpleRecurrent(dim=dimen, activation=Tanh(), name="transition")

		atten = SequenceContentAttention(state_names=transition.apply.states,attended_dim=2*dimen, match_dim=dimen, name="attention")

		readout = Readout(
			readout_dim=vocab_size,
			source_names=[transition.apply.states[0],
			atten.take_glimpses.outputs[0]],
			emitter=SoftmaxEmitter(name="emitter"),
			feedback_brick=LookupFeedback(vocab_size, dimen),
			name="readout");

		generator = SequenceGenerator(readout=readout, transition=transition, attention=atten,name="generator")
	
		self.lookup = lookup
		self.fork = fork
		self.encoder = encoder
		self.generator = generator
		self.children = [lookup, fork, encoder, generator]
예제 #39
0
    def __init__(self,
                 num_input_words,
                 emb_dim,
                 dim,
                 vocab,
                 lookup=None,
                 translate=True,
                 normalize=True,
                 **kwargs):

        if num_input_words > 0:
            logger.info("Restricting def vocab to " + str(num_input_words))
            self._num_input_words = num_input_words
        else:
            self._num_input_words = vocab.size()

        self._vocab = vocab
        self._translate = translate
        self._normalize = normalize

        children = []

        if lookup is None:
            logger.info("emb_dim={}".format(emb_dim))
            self._def_lookup = LookupTable(self._num_input_words,
                                           emb_dim,
                                           name='def_lookup')
        else:
            self._def_lookup = lookup

        # Makes sense for shared lookup. Then we precondition embeddings.
        # Doesn't makes otherwise (WH = W')
        # TODO(kudkudak): Refactor redundant translate parameter
        if self._translate:
            if emb_dim == dim:
                raise Exception("Redundant layer")

            self._def_translate = Linear(emb_dim, dim, name='def_translate')
            children.extend([self._def_translate])
        else:
            if emb_dim != dim:
                raise Exception("Please pass translate=True if emb_dim != dim")

        children.append(self._def_lookup)

        super(MeanPoolReadDefinitions, self).__init__(children=children,
                                                      **kwargs)
class CompositionalLayerToyWithTables(Initializable):
    def __init__(self, batch_size, num_subwords, num_words, subword_embedding_size, input_vocab_size,
                 subword_RNN_hidden_state_size, **kwargs):

        super(CompositionalLayerToyWithTables, self).__init__(**kwargs)

        self.batch_size = batch_size
        self.num_subwords = num_subwords # number of subwords which make up a word
        self.num_words = num_words  # number of words in the sentence
        self.subword_embedding_size = subword_embedding_size
        self.input_vocab_size = input_vocab_size
        self.subword_RNN_hidden_state_size = subword_RNN_hidden_state_size

        # create the look up table
        self.lookup = LookupTable(length=self.input_vocab_size, dim=self.subword_embedding_size, name='input_lookup')
        self.lookup.weights_init = Uniform(width=0.08)
        self.lookup.biases_init = Constant(0)

        # has one RNN which reads the subwords into a word embedding
        self.compositional_subword_to_word_RNN = SimpleRecurrent(
            dim=self.subword_RNN_hidden_state_size, activation=Identity(), name='subword_RNN',
            weights_init=Identity_init())

        self.children = [self.lookup, self.compositional_subword_to_word_RNN]


    '''
    subword_id_input_ is a 3d tensor with the dimensions of shape = (num_words, num_subwords, batch_size).
    It is expected as a dtype=uint16 or equivalent

    subword_id_input_mask_ is a 3d tensor with the dimensions of shape = (num_words, num_subwords, batch_size).
    It is expected as a dtype=uint8 or equivalent and has binary values of 1 when there is data and zero otherwise.

    The look up table will return a 4d tensor with shape = (num_words, num_subwords, batch_size, embedding size)

    The RNN will eat up the subwords dimension, resulting in a
    3d tensor of shape = (num_words, batch_size, RNN_hidden_value_size), which is returned as 'word_embeddings'

    Also returned is a 2d tensor of shape = (num_words, batch_zize), which is the remaining mask indicated
    the length of the sentence for each sentence in the batch.  i.e., 1 when there is a word, 0 otherwise.
    '''
    @application(inputs=['subword_id_input_', 'subword_id_input_mask_'], outputs=['word_embeddings', 'word_embeddings_mask'])
    def apply(self, subword_id_input_, subword_id_input_mask_):
        ##shape = (num_words, num_subwords, batch_size, embedding size)
        subword_embeddings = self.lookup.apply(subword_id_input_)

        result, updates = theano.scan( #loop over each word and have the rnn eat up the subwords
            fn=lambda subword_embeddings, subword_id_input_mask_: self.compositional_subword_to_word_RNN.apply(subword_embeddings, mask=subword_id_input_mask_),
            sequences= [subword_embeddings, subword_id_input_mask_])

        word_embeddings = result.dimshuffle(1,0,2,3) #put the states as the last dimension
        #remove this line to see the RNN states
        word_embeddings  = word_embeddings[-1] #take only the last state, since we dont need the others

        #remove subword dim from mask
        #if subword is empty then word is emptry the word is emptry, if not then the word is used
        word_embeddings_mask = subword_id_input_mask_.max(axis=1)

        return word_embeddings, word_embeddings_mask
    def __init__(self, num_outputs=None, feedback_dim=None, **kwargs):
        super(LookupFeedback, self).__init__(**kwargs)
        self.num_outputs = num_outputs
        self.feedback_dim = feedback_dim

        self.lookup = LookupTable(num_outputs, feedback_dim,
                                  weights_init=self.weights_init)
        self.children = [self.lookup]
예제 #42
0
    def __init__(self, vocab_size, embedding_dim, state_dim, **kwargs):
        super(Encoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim

        self.lookup = LookupTable(name='embeddings')
        self.GRU = GatedRecurrent(activation=Tanh(), dim=state_dim)
        self.children = [self.lookup, self.GRU]
예제 #43
0
    def __init__(self, dim, **kwargs):
        super(LookupBottom, self).__init__(**kwargs)
        self.dim = dim

        self.mask = tensor.matrix('inputs_mask')
        self.batch_inputs = {'inputs': tensor.lmatrix('inputs')}
        self.single_inputs = {'inputs': tensor.lvector('inputs')}

        self.children = [LookupTable(self.input_num_chars['inputs'], self.dim)]
예제 #44
0
def nn_fprop(x,
             x_mask,
             y,
             y_mask,
             lens,
             vocab_size,
             hidden_size,
             num_layers,
             model,
             boosting=False,
             **kwargs):
    lookup = LookupTable(length=vocab_size, dim=hidden_size)
    initialize([lookup])
    h = lookup.apply(x)
    first = True
    for i in range(num_layers):
        if model == 'rnn':
            h = rnn_layer(hidden_size,
                          h,
                          i,
                          x_mask=x_mask,
                          first=first,
                          **kwargs)
        elif model == 'gru':
            h = gru_layer(hidden_size,
                          h,
                          i,
                          x_mask=x_mask,
                          first=first,
                          **kwargs)
        elif model == 'lstm':
            h = lstm_layer(hidden_size,
                           h,
                           i,
                           x_mask=x_mask,
                           first=first,
                           **kwargs)
        else:
            print("models must either be rnn or lstm")
            sys.exit(0)
        first = False

    return softmax_layer(h, y, x_mask, y_mask, lens, vocab_size, hidden_size,
                         boosting)
예제 #45
0
파일: encoder.py 프로젝트: ucam-smt/sgnmt
    def __init__(self, 
                 vocab_size, 
                 embedding_dim, 
                 n_layers, 
                 skip_connections, 
                 state_dim, 
                 **kwargs):
        """Sole constructor.
        
        Args:
            vocab_size (int): Source vocabulary size
            embedding_dim (int): Dimension of the embedding layer
            n_layers (int): Number of layers. Layers share the same
                            weight matrices.
            skip_connections (bool): Skip connections connect the
                                     source word embeddings directly 
                                     with deeper layers to propagate 
                                     the gradient more efficiently
            state_dim (int): Number of hidden units in the recurrent
                             layers.
        """
        super(BidirectionalEncoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.n_layers = n_layers
        self.state_dim = state_dim
        self.skip_connections = skip_connections

        self.lookup = LookupTable(name='embeddings')
        if self.n_layers >= 1:
            self.bidir = BidirectionalWMT15(
                GatedRecurrent(activation=Tanh(), dim=state_dim))
            self.fwd_fork = Fork(
                [name for name in self.bidir.prototype.apply.sequences
                 if name != 'mask'], prototype=Linear(), name='fwd_fork')
            self.back_fork = Fork(
                [name for name in self.bidir.prototype.apply.sequences
                 if name != 'mask'], prototype=Linear(), name='back_fork')
            self.children = [self.lookup, self.bidir,
                             self.fwd_fork, self.back_fork]
            if self.n_layers > 1: # Deep encoder
                self.mid_fwd_fork = Fork(
                    [name for name in self.bidir.prototype.apply.sequences
                     if name != 'mask'], prototype=Linear(), name='mid_fwd_fork')
                self.mid_back_fork = Fork(
                    [name for name in self.bidir.prototype.apply.sequences
                     if name != 'mask'], prototype=Linear(), name='mid_back_fork')
                self.children.append(self.mid_fwd_fork)
                self.children.append(self.mid_back_fork)
        elif self.n_layers == 0:
            self.embedding_dim = state_dim*2
            self.children = [self.lookup]
        else:
            logging.fatal("Number of encoder layers must be non-negative")
예제 #46
0
파일: model_encdec.py 프로젝트: rizar/NMT
    def __init__(self, vocab_size, embedding_dim, state_dim, reverse=True,
                 **kwargs):
        super(Encoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim
        self.reverse = reverse

        self.lookup = LookupTable(name='embeddings')
        self.transition = GatedRecurrent(Tanh(), name='encoder_transition')
        self.fork = Fork([name for name in self.transition.apply.sequences
                          if name != 'mask'], prototype=Linear())

        self.children = [self.lookup, self.transition, self.fork]
예제 #47
0
파일: model.py 프로젝트: rizar/NMT
    def __init__(self, vocab_size, embedding_dim, state_dim, **kwargs):
        super(BidirectionalEncoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim

        self.lookup = LookupTable(name='embeddings')
        self.bidir = BidirectionalWMT15(GatedRecurrent(activation=Tanh(), dim=state_dim))
        self.fwd_fork = Fork([name for name in self.bidir.prototype.apply.sequences
                          if name != 'mask'], prototype=Linear(), name='fwd_fork')
        self.back_fork = Fork([name for name in self.bidir.prototype.apply.sequences
                          if name != 'mask'], prototype=Linear(), name='back_fork')

        self.children = [self.lookup, self.bidir, self.fwd_fork, self.back_fork]
예제 #48
0
파일: model.py 프로젝트: MtMoon/PoemProject
    def __init__(self, blockid, vocab_size, embedding_dim, state_dim, **kwargs):
        super(Encoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim
        self.blockid = blockid

        self.lookup = LookupTable(name='embeddings' + '_' + self.blockid)
        self.gru = GatedRecurrent(activation=Tanh(), dim=state_dim, name = "GatedRNN" + self.blockid)
        self.fwd_fork = Fork(
            [name for name in self.gru.apply.sequences
             if name != 'mask'], prototype=Linear(), name='fwd_fork' + '_' + self.blockid)

        self.children = [self.lookup, self.gru, self.fwd_fork]
예제 #49
0
class BidirectionalEncoder(Initializable):
    """Encoder of RNNsearch model."""

    def __init__(self, vocab_size, embedding_dim, state_dim, **kwargs):
        super(BidirectionalEncoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim

        self.lookup = LookupTable(name='embeddings')
        self.bidir = BidirectionalWMT15(
            GatedRecurrent(activation=Tanh(), dim=state_dim))
        self.fwd_fork = Fork(
            [name for name in self.bidir.prototype.apply.sequences
             if name != 'mask'], prototype=Linear(), name='fwd_fork')
        self.back_fork = Fork(
            [name for name in self.bidir.prototype.apply.sequences
             if name != 'mask'], prototype=Linear(), name='back_fork')

        self.children = [self.lookup, self.bidir,
                         self.fwd_fork, self.back_fork]

    def _push_allocation_config(self):
        self.lookup.length = self.vocab_size
        self.lookup.dim = self.embedding_dim

        self.fwd_fork.input_dim = self.embedding_dim
        self.fwd_fork.output_dims = [self.bidir.children[0].get_dim(name)
                                     for name in self.fwd_fork.output_names]
        self.back_fork.input_dim = self.embedding_dim
        self.back_fork.output_dims = [self.bidir.children[1].get_dim(name)
                                      for name in self.back_fork.output_names]

    @application(inputs=['source_sentence', 'source_sentence_mask'],
                 outputs=['representation'])
    def apply(self, source_sentence, source_sentence_mask):
        # Time as first dimension
        source_sentence = source_sentence.T
        source_sentence_mask = source_sentence_mask.T

        embeddings = self.lookup.apply(source_sentence)

        representation = self.bidir.apply(
            merge(self.fwd_fork.apply(embeddings, as_dict=True),
                  {'mask': source_sentence_mask}),
            merge(self.back_fork.apply(embeddings, as_dict=True),
                  {'mask': source_sentence_mask})
        )
        return representation
예제 #50
0
def construct_model(vocab_size, embedding_dim, ngram_order, hidden_dims,
                    activations):
    # Construct the model
    x = tensor.lmatrix('features')
    y = tensor.lvector('targets')

    lookup = LookupTable(length=vocab_size, dim=embedding_dim, name='lookup')
    hidden = MLP(activations=activations + [None],
                 dims=[ngram_order * embedding_dim] + hidden_dims +
                 [vocab_size])

    embeddings = lookup.apply(x)
    embeddings = embeddings.flatten(ndim=2)  # Concatenate embeddings
    activations = hidden.apply(embeddings)
    cost = Softmax().categorical_cross_entropy(y, activations)

    # Initialize parameters
    lookup.weights_init = IsotropicGaussian(0.001)
    hidden.weights_init = IsotropicGaussian(0.01)
    hidden.biases_init = Constant(0.001)
    lookup.initialize()
    hidden.initialize()

    return cost
예제 #51
0
    def __init__(self, input1_size, input2_size, lookup1_dim=200, lookup2_dim=200, hidden_size=512):
        self.hidden_size = hidden_size
        self.input1_size = input1_size
        self.input2_size = input2_size
        self.lookup1_dim = lookup1_dim
        self.lookup2_dim = lookup2_dim

        x1 = tensor.lmatrix('durations')
        x2 = tensor.lmatrix('syllables')
        y = tensor.lmatrix('pitches')

        lookup1 = LookupTable(dim=self.lookup1_dim, length=self.input1_size, name='lookup1',
                              weights_init=initialization.Uniform(width=0.01),
                              biases_init=Constant(0))
        lookup1.initialize()
        lookup2 = LookupTable(dim=self.lookup2_dim, length=self.input2_size, name='lookup2',
                              weights_init=initialization.Uniform(width=0.01),
                              biases_init=Constant(0))
        lookup2.initialize()
        merge = Merge(['lookup1', 'lookup2'], [self.lookup1_dim, self.lookup2_dim], self.hidden_size,
                              weights_init=initialization.Uniform(width=0.01),
                              biases_init=Constant(0))
        merge.initialize()
        recurrent_block = LSTM(dim=self.hidden_size, activation=Tanh(),
                              weights_init=initialization.Uniform(width=0.01)) #RecurrentStack([LSTM(dim=self.hidden_size, activation=Tanh())] * 3)
        recurrent_block.initialize()
        linear = Linear(input_dim=self.hidden_size, output_dim=self.input1_size,
                              weights_init=initialization.Uniform(width=0.01),
                              biases_init=Constant(0))
        linear.initialize()
        softmax = NDimensionalSoftmax()

        l1 = lookup1.apply(x1)
        l2 = lookup2.apply(x2)
        m = merge.apply(l1, l2)
        h = recurrent_block.apply(m)
        a = linear.apply(h)

        y_hat = softmax.apply(a, extra_ndim=1)
        # ValueError: x must be 1-d or 2-d tensor of floats. Got TensorType(float64, 3D)

        self.Cost = softmax.categorical_cross_entropy(y, a, extra_ndim=1).mean()

        self.ComputationGraph = ComputationGraph(self.Cost)

        self.Model = Model(y_hat)
예제 #52
0
파일: encoder.py 프로젝트: ucam-smt/sgnmt
 def __init__(self, 
              vocab_size, 
              embedding_dim, 
              n_layers, 
              skip_connections, 
              state_dim, 
              **kwargs):
     """Sole constructor.
     
     Args:
         vocab_size (int): Source vocabulary size
         embedding_dim (int): Dimension of the embedding layer
         n_layers (int): Number of layers. Layers share the same
                         weight matrices.
         skip_connections (bool): Skip connections connect the
                                  source word embeddings directly 
                                  with deeper layers to propagate 
                                  the gradient more efficiently
         state_dim (int): Number of hidden units in the recurrent
                          layers.
     """
     super(DeepBidirectionalEncoder, self).__init__(**kwargs)
     self.vocab_size = vocab_size
     self.embedding_dim = embedding_dim
     self.n_layers = n_layers
     self.state_dim = state_dim
     self.skip_connections = skip_connections
     self.lookup = LookupTable(name='embeddings')
     self.bidirs = []
     self.fwd_forks =[]
     self.back_forks = []        
     for i in xrange(self.n_layers):
         bidir = BidirectionalWMT15(
             GatedRecurrent(activation=Tanh(), dim=state_dim),
             name='bidir%d' % i)
         self.bidirs.append(bidir)
         self.fwd_forks.append(Fork(
             [name for name in bidir.prototype.apply.sequences 
              if name != 'mask'], 
             prototype=Linear(), name='fwd_fork%d' % i))
         self.back_forks.append(Fork(
             [name for name in bidir.prototype.apply.sequences
              if name != 'mask'], 
             prototype=Linear(), name='back_fork%d' % i))
     self.children = [self.lookup] \
                     + self.bidirs \
                     + self.fwd_forks \
                     + self.back_forks
예제 #53
0
파일: model.py 프로젝트: sotelo/parrot
class Encoder(Initializable):
    def __init__(
            self,
            encoder_type,
            num_characters,
            input_dim,
            encoder_dim,
            **kwargs):
        assert encoder_type in [None, 'bidirectional']
        self.encoder_type = encoder_type
        super(Encoder, self).__init__(**kwargs)

        self.children = []

        if encoder_type in ['lookup', 'bidirectional']:
            self.embed_label = LookupTable(
                num_characters,
                input_dim,
                name='embed_label')
            self.children += [
                self.embed_label]
        else:
            # If there is no encoder.
            assert num_characters == input_dim

        if encoder_type == 'bidirectional':
            transition = RecurrentWithFork(
                GatedRecurrent(dim=encoder_dim).apply,
                input_dim, name='encoder_transition')
            self.encoder = Bidirectional(transition, name='encoder')
            self.children.append(self.encoder)

    @application
    def apply(self, x, x_mask=None):
        if self.encoder_type is None:
            return x

        if self.encoder_type in ['lookup', 'bidirectional']:
            embed_x = self.embed_label.apply(x)

        if self.encoder_type == 'lookup':
            encoded_x = embed_x

        if self.encoder_type == 'bidirectional':
            encoded_x = self.encoder.apply(embed_x, x_mask)

        return encoded_x
예제 #54
0
파일: model_encdec.py 프로젝트: rizar/NMT
class Encoder(Initializable):
    def __init__(self, vocab_size, embedding_dim, state_dim, reverse=True,
                 **kwargs):
        super(Encoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.state_dim = state_dim
        self.reverse = reverse

        self.lookup = LookupTable(name='embeddings')
        self.transition = GatedRecurrent(Tanh(), name='encoder_transition')
        self.fork = Fork([name for name in self.transition.apply.sequences
                          if name != 'mask'], prototype=Linear())

        self.children = [self.lookup, self.transition, self.fork]

    def _push_allocation_config(self):
        self.lookup.length = self.vocab_size
        self.lookup.dim = self.embedding_dim
        self.transition.dim = self.state_dim
        self.fork.input_dim = self.embedding_dim
        self.fork.output_dims = [self.state_dim
                                 for _ in self.fork.output_names]

    @application(inputs=['source_sentence', 'source_sentence_mask'],
                 outputs=['representation'])
    def apply(self, source_sentence, source_sentence_mask):
        # Time as first dimension
        source_sentence = source_sentence.dimshuffle(1, 0)
        source_sentence_mask = source_sentence_mask.T
        if self.reverse:
            source_sentence = source_sentence[::-1]
            source_sentence_mask = source_sentence_mask[::-1]

        embeddings = self.lookup.apply(source_sentence)
        representation = self.transition.apply(**merge(
            self.fork.apply(embeddings, as_dict=True),
            {'mask': source_sentence_mask}
        ))
        return representation[-1]