Exemple #1
0
    def __init__(self, dim, mini_dim, summary_dim, **kwargs):
        super(LSTMwMini, self).__init__(**kwargs)
        self.dim = dim
        self.mini_dim = mini_dim
        self.summary_dim = summary_dim

        self.recurrent_layer = LSTM(dim=self.summary_dim,
                                    activation=Rectifier(),
                                    name='recurrent_layer',
                                    weights_init=IsotropicGaussian(),
                                    biases_init=Constant(0.0))
        self.mini_recurrent_layer = LSTM(dim=self.mini_dim,
                                         activation=Rectifier(),
                                         name='mini_recurrent_layer',
                                         weights_init=IsotropicGaussian(),
                                         biases_init=Constant(0.0))

        self.mini_to_main = Linear(self.dim + self.mini_dim,
                                   self.summary_dim,
                                   name='mini_to_main',
                                   weights_init=IsotropicGaussian(),
                                   biases_init=Constant(0.0))
        self.mini_to_main2 = Linear(self.summary_dim,
                                    self.summary_dim * 4,
                                    name='mini_to_main2',
                                    weights_init=IsotropicGaussian(),
                                    biases_init=Constant(0.0))

        self.children = [
            self.recurrent_layer, self.mini_recurrent_layer, self.mini_to_main,
            self.mini_to_main2
        ]
Exemple #2
0
    def __init__(self, feature_dim, memory_dim, fc1_dim, fc2_dim):
        self.W = Linear(input_dim=feature_dim,
                        output_dim=memory_dim * 4,
                        weights_init=IsotropicGaussian(0.01),
                        biases_init=Constant(0),
                        use_bias=False,
                        name='seqDecoder_W')
        self.GRU_A = LSTM(feature_dim,
                          name='seqDecoder_A',
                          weights_init=IsotropicGaussian(0.01),
                          biases_init=Constant(0))
        self.GRU_B = LSTM(memory_dim,
                          name='seqDecoder_B',
                          weights_init=IsotropicGaussian(0.01),
                          biases_init=Constant(0))
        self.W.initialize()
        self.GRU_A.initialize()
        self.GRU_B.initialize()
        self.fc1 = Linear(input_dim=memory_dim,
                          output_dim=fc1_dim,
                          weights_init=IsotropicGaussian(0.01),
                          biases_init=Constant(0),
                          name='fc1')
        self.fc2 = Linear(input_dim=fc1_dim,
                          output_dim=fc2_dim,
                          weights_init=IsotropicGaussian(0.01),
                          biases_init=Constant(0),
                          name='fc2')

        self.fc1.initialize()
        self.fc2.initialize()
Exemple #3
0
    def __init__(self, emb_dim, dim, num_input_words, 
                 num_output_words, vocab, 
                 **kwargs):
        if emb_dim == 0:
            emb_dim = dim
        if num_input_words == 0:
            num_input_words = vocab.size()
        if num_output_words == 0:
            num_output_words = vocab.size()

        self._num_input_words = num_input_words
        self._num_output_words = num_output_words
        self._vocab = vocab

        self._word_to_id = WordToIdOp(self._vocab)

        children = []

        self._main_lookup = LookupTable(self._num_input_words, emb_dim, name='main_lookup')
        self._encoder_fork = Linear(emb_dim, 4 * dim, name='encoder_fork')
        self._encoder_rnn = LSTM(dim, name='encoder_rnn')
        self._decoder_fork = Linear(emb_dim, 4 * dim, name='decoder_fork')
        self._decoder_rnn = LSTM(dim, name='decoder_rnn')
        children.extend([self._main_lookup,
                         self._encoder_fork, self._encoder_rnn,
                         self._decoder_fork, self._decoder_rnn])
        self._pre_softmax = Linear(dim, self._num_output_words)
        self._softmax = NDimensionalSoftmax()
        children.extend([self._pre_softmax, self._softmax])

        super(LanguageModel, self).__init__(children=children, **kwargs)
def make_bidir_lstm_stack(seq, seq_dim, mask, sizes, skip=True, name=''):
    bricks = []

    curr_dim = [seq_dim]
    curr_hidden = [seq]

    hidden_list = []
    for k, dim in enumerate(sizes):
        fwd_lstm_ins = [Linear(input_dim=d, output_dim=4*dim, name='%s_fwd_lstm_in_%d_%d'%(name,k,l)) for l, d in enumerate(curr_dim)]
        fwd_lstm = LSTM(dim=dim, activation=Tanh(), name='%s_fwd_lstm_%d'%(name,k))

        bwd_lstm_ins = [Linear(input_dim=d, output_dim=4*dim, name='%s_bwd_lstm_in_%d_%d'%(name,k,l)) for l, d in enumerate(curr_dim)]
        bwd_lstm = LSTM(dim=dim, activation=Tanh(), name='%s_bwd_lstm_%d'%(name,k))

        bricks = bricks + [fwd_lstm, bwd_lstm] + fwd_lstm_ins + bwd_lstm_ins

        fwd_tmp = sum(x.apply(v) for x, v in zip(fwd_lstm_ins, curr_hidden))
        bwd_tmp = sum(x.apply(v) for x, v in zip(bwd_lstm_ins, curr_hidden))
        fwd_hidden, _ = fwd_lstm.apply(fwd_tmp, mask=mask)
        bwd_hidden, _ = bwd_lstm.apply(bwd_tmp[::-1], mask=mask[::-1])
        hidden_list = hidden_list + [fwd_hidden, bwd_hidden]
        if skip:
            curr_hidden = [seq, fwd_hidden, bwd_hidden[::-1]]
            curr_dim = [seq_dim, dim, dim]
        else:
            curr_hidden = [fwd_hidden, bwd_hidden[::-1]]
            curr_dim = [dim, dim]

    return bricks, hidden_list
Exemple #5
0
    def __init__(self,
                 input_dim,
                 output_dim,
                 lstm_dim,
                 print_intermediate=False,
                 print_attrs=['__str__'],
                 **kwargs):
        super(LinearLSTM, self).__init__(**kwargs)

        self.x_to_h = Linear(input_dim,
                             lstm_dim * 4,
                             name='x_to_h',
                             weights_init=IsotropicGaussian(),
                             biases_init=Constant(0.0))
        self.lstm = LSTM(lstm_dim,
                         name='lstm',
                         weights_init=IsotropicGaussian(),
                         biases_init=Constant(0.0))
        self.h_to_o = Linear(lstm_dim,
                             output_dim,
                             name='h_to_o',
                             weights_init=IsotropicGaussian(),
                             biases_init=Constant(0.0))

        self.children = [self.x_to_h, self.lstm, self.h_to_o]

        self.print_intermediate = print_intermediate
        self.print_attrs = print_attrs
Exemple #6
0
    def create_model(self):
        input_dim = self.input_dim
        x = self.x
        x_to_h = Linear(input_dim,
                        input_dim * 4,
                        name='x_to_h',
                        weights_init=IsotropicGaussian(),
                        biases_init=Constant(0.0))
        lstm = LSTM(input_dim,
                    name='lstm',
                    weights_init=IsotropicGaussian(),
                    biases_init=Constant(0.0))
        h_to_o = Linear(input_dim,
                        1,
                        name='h_to_o',
                        weights_init=IsotropicGaussian(),
                        biases_init=Constant(0.0))

        x_transform = x_to_h.apply(x)
        self.x_to_h = x_to_h
        self.lstm = lstm
        self.h_to_o = h_to_o

        h, c = lstm.apply(x_transform)

        # only values of hidden units of the last timeframe are used for
        # the classification
        probs = h_to_o.apply(h[-1])
        return probs
Exemple #7
0
def bilstm_layer(in_dim, inp, h_dim, n):
    linear = Linear(input_dim=in_dim, output_dim=h_dim * 4, name='linear' + str(n)+inp.name)
    lstm = LSTM(dim=h_dim, name='lstm' + str(n)+inp.name)
    bilstm = Bidirectional(prototype=lstm)
    bilstm.name = 'bilstm' + str(n) + inp.name
    initialize([linear, bilstm])
    return bilstm.apply(linear.apply(inp))[0]
Exemple #8
0
def lstm_layer(in_dim, h, h_dim, n, pref=""):
    linear = Linear(input_dim=in_dim,
                    output_dim=h_dim * 4,
                    name='linear' + str(n) + pref)
    lstm = LSTM(dim=h_dim, name='lstm' + str(n) + pref)
    initialize([linear, lstm])
    return lstm.apply(linear.apply(h))[0]
Exemple #9
0
    def __init__(self, config, **kwargs):
        super(Model, self).__init__(**kwargs)
        self.config = config

        self.pre_context_embedder = ContextEmbedder(
            config.pre_embedder, name='pre_context_embedder')
        self.post_context_embedder = ContextEmbedder(
            config.post_embedder, name='post_context_embedder')

        in1 = 2 + sum(x[2] for x in config.pre_embedder.dim_embeddings)
        self.input_to_rec = MLP(activations=[Tanh()],
                                dims=[in1, config.hidden_state_dim],
                                name='input_to_rec')

        self.rec = LSTM(dim=config.hidden_state_dim, name='recurrent')

        in2 = config.hidden_state_dim + sum(
            x[2] for x in config.post_embedder.dim_embeddings)
        self.rec_to_output = MLP(activations=[Tanh()],
                                 dims=[in2, 2],
                                 name='rec_to_output')

        self.sequences = ['latitude', 'latitude_mask', 'longitude']
        self.context = self.pre_context_embedder.inputs + self.post_context_embedder.inputs
        self.inputs = self.sequences + self.context
        self.children = [
            self.pre_context_embedder, self.post_context_embedder,
            self.input_to_rec, self.rec, self.rec_to_output
        ]

        self.initial_state_ = shared_floatx_zeros((config.hidden_state_dim, ),
                                                  name="initial_state")
        self.initial_cells = shared_floatx_zeros((config.hidden_state_dim, ),
                                                 name="initial_cells")
Exemple #10
0
    def apply(self, input_, target):
        x_to_h = Linear(name='x_to_h',
                        input_dim=self.dims[0],
                        output_dim=self.dims[1] * 4)
        pre_rnn = x_to_h.apply(input_)
        pre_rnn.name = 'pre_rnn'
        rnn = LSTM(activation=Tanh(), dim=self.dims[1], name=self.name)
        h, _ = rnn.apply(pre_rnn)
        h.name = 'h'
        h_to_y = Linear(name='h_to_y',
                        input_dim=self.dims[1],
                        output_dim=self.dims[2])
        y_hat = h_to_y.apply(h)
        y_hat.name = 'y_hat'

        cost = SquaredError().apply(target, y_hat)
        cost.name = 'MSE'

        self.outputs = {}
        self.outputs['y_hat'] = y_hat
        self.outputs['cost'] = cost
        self.outputs['pre_rnn'] = pre_rnn
        self.outputs['h'] = h

        # Initialization
        for brick in (rnn, x_to_h, h_to_y):
            brick.weights_init = IsotropicGaussian(0.01)
            brick.biases_init = Constant(0)
            brick.initialize()
Exemple #11
0
 def __init__(self, image_shape, patch_shape, hidden_dim,
              n_spatial_dims, whatwhere_interaction, prefork_area_transform,
              postmerge_area_transform, patch_transform, batch_normalize,
              response_transform, location_std, scale_std, cutoff,
              batched_window, initargs, emitter, **kwargs):
     self.rnn = LSTM(activation=Tanh(),
                     dim=hidden_dim,
                     name="recurrent",
                     weights_init=IsotropicGaussian(1e-4),
                     biases_init=Constant(0))
     self.locator = masonry.Locator(hidden_dim, n_spatial_dims,
                                    area_transform=prefork_area_transform,
                                    location_std=location_std,
                                    scale_std=scale_std,
                                    **initargs)
     self.cropper = crop.LocallySoftRectangularCropper(
         n_spatial_dims=n_spatial_dims,
         image_shape=image_shape, patch_shape=patch_shape,
         kernel=crop.Gaussian(), cutoff=cutoff,
         batched_window=batched_window)
     self.merger = masonry.Merger(
         patch_transform=patch_transform,
         area_transform=postmerge_area_transform,
         response_transform=response_transform,
         n_spatial_dims=n_spatial_dims,
         whatwhere_interaction=whatwhere_interaction,
         batch_normalize=batch_normalize,
         **initargs)
     self.attention = masonry.SpatialAttention(
         self.locator, self.cropper, self.merger,
         name="sa")
     self.emitter = emitter
     self.model = masonry.RecurrentAttentionModel(
         self.rnn, self.attention, self.emitter,
         name="ram")
Exemple #12
0
    def __init__(self,
                 num_input_words,
                 emb_dim,
                 dim,
                 vocab,
                 lookup=None,
                 fork_and_rnn=None,
                 **kwargs):

        if num_input_words > 0:
            logger.info("Restricting def vocab to " + str(num_input_words))
            self._num_input_words = num_input_words
        else:
            self._num_input_words = vocab.size()

        self._vocab = vocab

        children = []

        if lookup is None:
            self._def_lookup = LookupTable(self._num_input_words,
                                           emb_dim,
                                           name='def_lookup')
        else:
            self._def_lookup = lookup

        if fork_and_rnn is None:
            self._def_fork = Linear(emb_dim, 4 * dim, name='def_fork')
            self._def_rnn = LSTM(dim, name='def_rnn')
        else:
            self._def_fork, self._def_rnn = fork_and_rnn

        children.extend([self._def_lookup, self._def_fork, self._def_rnn])

        super(LSTMReadDefinitions, self).__init__(children=children, **kwargs)
Exemple #13
0
    def __init__(self, feature_dim, hidden_dim, output_dim):
        self.image_embed = Linear(input_dim=feature_dim,
                                  output_dim=hidden_dim,
                                  weights_init=IsotropicGaussian(0.01),
                                  biases_init=Constant(0),
                                  use_bias=False,
                                  name='image_embed')
        self.word_embed = Linear(input_dim=feature_dim,
                                 output_dim=hidden_dim,
                                 weights_init=IsotropicGaussian(0.01),
                                 biases_init=Constant(0),
                                 use_bias=False,
                                 name='word_embed')
        self.r_embed = Linear(input_dim=feature_dim,
                              output_dim=hidden_dim,
                              weights_init=IsotropicGaussian(0.01),
                              biases_init=Constant(0),
                              use_bias=False,
                              name='r_embed')
        self.m_to_s = Linear(input_dim=hidden_dim,
                             output_dim=1,
                             weights_init=IsotropicGaussian(0.01),
                             biases_init=Constant(0),
                             use_bias=False,
                             name='m_to_s')
        self.attention_dist = Softmax(name='attention_dist_softmax')
        self.r_to_r = Linear(input_dim=feature_dim,
                             output_dim=feature_dim,
                             weights_init=IsotropicGaussian(0.01),
                             biases_init=Constant(0),
                             use_bias=False,
                             name='r_to_r')
        # self.r_to_g = Linear(input_dim=feature_dim,
        #                      output_dim=output_dim,
        #                      weights_init=IsotropicGaussian(0.01),
        #                      biases_init=Constant(0),
        #                      use_bias=False,
        #                      name='r_to_g')
        self.image_embed.initialize()
        self.word_embed.initialize()
        self.r_embed.initialize()
        self.m_to_s.initialize()
        self.r_to_r.initialize()
        # self.r_to_g.initialize()

        # the sequence to sequence LSTM
        self.seq = LSTM(output_dim,
                        name='rewatcher_seq',
                        weights_init=IsotropicGaussian(0.01),
                        biases_init=Constant(0))
        self.seq_embed = Linear(feature_dim,
                                output_dim * 4,
                                name='rewatcher_seq_embed',
                                weights_init=IsotropicGaussian(0.01),
                                biases_init=Constant(0),
                                use_bias=False)

        self.seq.initialize()
        self.seq_embed.initialize()
Exemple #14
0
def lstm_layer(in_size, dim, x, h, n, task, first_layer=False):
    if connect_h_to_h == 'all-previous':
        if first_layer:
            lstm_input = x
            linear = Linear(input_dim=in_size,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-' + str(task))
        elif connect_x_to_h:
            lstm_input = T.concatenate([x] + [hidden for hidden in h], axis=2)
            linear = Linear(input_dim=in_size + dim * (n),
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-' + str(task))
        else:
            lstm_input = T.concatenate([hidden for hidden in h], axis=2)
            linear = Linear(input_dim=dim * (n + 1),
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-' + str(task))
    elif connect_h_to_h == 'two-previous':
        if first_layer:
            lstm_input = x
            linear = Linear(input_dim=in_size,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-' + str(task))
        elif connect_x_to_h:
            lstm_input = T.concatenate([x] + h[max(0, n - 2):n], axis=2)
            linear = Linear(input_dim=in_size + dim * 2 if n > 1 else in_size +
                            dim,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-' + str(task))
        else:
            lstm_input = T.concatenate(h[max(0, n - 2):n], axis=2)
            linear = Linear(input_dim=dim * 2 if n > 1 else dim,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-' + str(task))
    elif connect_h_to_h == 'one-previous':
        if first_layer:
            lstm_input = x
            linear = Linear(input_dim=in_size,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-' + str(task))
        elif connect_x_to_h:
            lstm_input = T.concatenate([x] + [h[n - 1]], axis=2)
            linear = Linear(input_dim=in_size + dim,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-' + str(task))
        else:
            lstm_input = h[n - 1]
            linear = Linear(input_dim=dim,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-' + str(task))
    lstm = LSTM(dim=dim, name=layer_models[n] + str(n) + '-' + str(task))
    initialize([linear, lstm])
    if layer_models[n] == 'lstm':
        return lstm.apply(linear.apply(lstm_input))
    elif layer_models[n] == 'mt_lstm':
        return lstm.apply(linear.apply(lstm_input),
                          time_scale=layer_resolutions[n],
                          time_offset=layer_execution_time_offset[n])
Exemple #15
0
def construct_model(activation_function, r_dim, hidden_dim, out_dim):
    # Construct the model
    r = tensor.fmatrix('r')
    x = tensor.fmatrix('x')
    y = tensor.ivector('y')

    nx = x.shape[0]
    nj = x.shape[1]  # also is r.shape[0]
    nr = r.shape[1]

    # r is nj x nr
    # x is nx x nj
    # y is nx

    # Get a representation of r of size r_dim
    r = DAE(r)

    # r is now nj x r_dim

    # r_rep is nx x nj x r_dim
    r_rep = r[None, :, :].repeat(axis=0, repeats=nx)
    # x3 is nx x nj x 1
    x3 = x[:, :, None]

    # concat is nx x nj x (r_dim + 1)
    concat = tensor.concatenate([r_rep, x3], axis=2)

    # Change concat from Batch x Time x Features to T X B x F
    rnn_input = concat.dimshuffle(1, 0, 2)

    linear = Linear(input_dim=r_dim + 1,
                    output_dim=4 * hidden_dim,
                    name="input_linear")
    lstm = LSTM(dim=hidden_dim,
                activation=activation_function,
                name="hidden_recurrent")
    top_linear = Linear(input_dim=hidden_dim,
                        output_dim=out_dim,
                        name="out_linear")

    pre_rnn = linear.apply(rnn_input)
    states = lstm.apply(pre_rnn)[0]
    activations = top_linear.apply(states)
    activations = tensor.mean(activations, axis=0)

    cost = Softmax().categorical_cross_entropy(y, activations)

    pred = activations.argmax(axis=1)
    error_rate = tensor.neq(y, pred).mean()

    # Initialize parameters

    for brick in (linear, lstm, top_linear):
        brick.weights_init = IsotropicGaussian(0.1)
        brick.biases_init = Constant(0.)
        brick.initialize()

    return cost, error_rate
Exemple #16
0
def add_lstm(input_dim, input_var):
    linear = Linear(input_dim=input_dim,output_dim=input_dim*4,name="linear_layer")
    lstm = LSTM(dim=input_dim, name="lstm_layer")

    testing_init(linear)
    #linear.initialize()
    default_init(lstm)

    h = linear.apply(input_var)
    return lstm.apply(h)
Exemple #17
0
    def create_model(self):
        input_dim = self.input_dim
        x = self.x
        y = self.y
        p = self.p
        mask = self.mask
        hidden_dim = self.hidden_dim
        embedding_dim = self.embedding_dim
        lookup = LookupTable(self.dict_size,
                             embedding_dim,
                             weights_init=IsotropicGaussian(0.001),
                             name='LookupTable')
        x_to_h = Linear(embedding_dim,
                        hidden_dim * 4,
                        name='x_to_h',
                        weights_init=IsotropicGaussian(0.001),
                        biases_init=Constant(0.0))
        lstm = LSTM(hidden_dim,
                    name='lstm',
                    weights_init=IsotropicGaussian(0.001),
                    biases_init=Constant(0.0))
        h_to_o = MLP([Logistic()], [hidden_dim, 1],
                     weights_init=IsotropicGaussian(0.001),
                     biases_init=Constant(0),
                     name='h_to_o')

        lookup.initialize()
        x_to_h.initialize()
        lstm.initialize()
        h_to_o.initialize()

        embed = lookup.apply(x).reshape(
            (x.shape[0], x.shape[1], self.embedding_dim))
        embed.name = "embed_vec"
        x_transform = x_to_h.apply(embed.transpose(1, 0, 2))
        x_transform.name = "Transformed X"
        self.lookup = lookup
        self.x_to_h = x_to_h
        self.lstm = lstm
        self.h_to_o = h_to_o

        #if mask is None:
        h, c = lstm.apply(x_transform)
        #else:
        #h, c = lstm.apply(x_transform, mask=mask)
        h.name = "hidden_state"
        c.name = "cell state"
        # only values of hidden units of the last timeframe are used for
        # the classification
        indices = T.sum(mask, axis=0) - 1
        rel_hid = h[indices, T.arange(h.shape[1])]
        out = self.h_to_o.apply(rel_hid)

        probs = out
        return probs
Exemple #18
0
def lstm_layer(dim, h, n, x_mask, first, **kwargs):
    linear = Linear(input_dim=dim, output_dim=dim * 4, name='linear' + str(n))
    lstm = LSTM(dim=dim, activation=Rectifier(), name='lstm' + str(n))
    initialize([linear, lstm])
    applyLin = linear.apply(h)

    if first:
        lstmApply = lstm.apply(applyLin, mask=x_mask, **kwargs)[0]
    else:
        lstmApply = lstm.apply(applyLin, **kwargs)[0]
    return lstmApply
Exemple #19
0
def getBidir2(input_dim,input_var):
    """ LSTM-based bidirectionnal """
    bidir = Bidirectional(weights_init=Orthogonal(),
                               prototype=LSTM(dim=input_dim, name='lstm'))
    #bidir.allocate()
    bidir.initialize()
    h = bidir.apply(input_var)

    net = add_softmax_layer(h, input_dim, 2)

    return net
 def lstm_layer(self, h, n):
     """
     Performs the LSTM update for a batch of word sequences
     :param h The word embeddings for this update
     :param n The number of layers of the LSTM
     """
     # Maps the word embedding to a dimensionality to be used in the LSTM
     linear = Linear(input_dim=self.hidden_size, output_dim=self.hidden_size * 4, name='linear_lstm' + str(n))
     initialize(linear, sqrt(6.0 / (5 * self.hidden_size)))
     lstm = LSTM(dim=self.hidden_size, name='lstm' + str(n))
     initialize(lstm, 0.08)
     return lstm.apply(linear.apply(h))
   def __init__(self, layers_no, dim, alphabet_size, batch_size):
      # characters -> 1-of-N embedder -> N-to-dim -> LSTM#0 -> ... -> LSTM#(layers_no-1) -> dim-to-N -> softmax
      # TODO zdefiniowac blad

      # TODO first_resizer

      # LSTM stack
      self.stack = []
      lstms = map(lambda _: LSTM(dim=dim), range(layers_no))
      for lstm in lstms:
         state, cell = lstm.initial_states(batch_size)
         self.stack.append(lstm, state, cell)
Exemple #22
0
    def __init__(self, dimension, input_size, embed_input=False, **kwargs):
        super(LSTMEncoder, self).__init__(**kwargs)
        if embed_input:
            self.embedder = LookupTable(input_size, dimension)
        else:
            self.embedder = Linear(input_size, dimension)
        self.fork = Fork(['inputs'],
                         dimension,
                         output_dims=[dimension],
                         prototype=Linear(dimension, 4 * dimension))
        encoder = Bidirectional(LSTM(dim=dimension, activation=Tanh()))

        self.encoder = encoder
        self.children = [encoder, self.embedder, self.fork]
Exemple #23
0
def example4():
    """LSTM -> Plante lors de l'initialisation du lstm."""

    x = tensor.tensor3('x')
    dim = 3

    #    gate_inputs = theano.function([x],x*4)
    gate_inputs = Linear(input_dim=dim,
                         output_dim=dim * 4,
                         name="linear",
                         weights_init=initialization.Identity(),
                         biases_init=Constant(2))

    lstm = LSTM(dim=dim,
                activation=Tanh(),
                weights_init=IsotropicGaussian(),
                biases_init=Constant(0))

    gate_inputs.initialize()
    hg = gate_inputs.apply(x)

    #print(gate_inputs.parameters)
    #print(gate_inputs.parameters[1].get_value())

    lstm.initialize()
    h, cells = lstm.apply(hg)
    print(lstm.parameters)

    f = theano.function([x], h)
    print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX)))
    print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX)))
    print(f(4 * np.ones((dim, 1, dim), dtype=theano.config.floatX)))

    print("Good Job!")

    #    lstm_output =

    #Initial State
    h0 = tensor.matrix('h0')
    c = tensor.matrix('cells')
    h, c1 = lstm.apply(
        inputs=x, states=h0,
        cells=c)  # lstm.apply(states=h0,cells=cells,inputs=gate_inputs)

    f = theano.function([x, h0, c], h)
    print("a")
    print(
        f(np.ones((3, 1, 3), dtype=theano.config.floatX),
          np.ones((1, 3), dtype=theano.config.floatX),
          np.ones((1, 3), dtype=theano.config.floatX)))
Exemple #24
0
    def setUp(self):
        n_iter = 2

        x_dim = 8
        z_dim = 10
        dec_dim = 12
        enc_dim = 16

        read_dim = 2 * x_dim

        rnninits = {
            #'weights_init': Orthogonal(),
            'weights_init': IsotropicGaussian(0.01),
            'biases_init': Constant(0.),
        }
        inits = {
            #'weights_init': Orthogonal(),
            'weights_init': IsotropicGaussian(0.01),
            'biases_init': Constant(0.),
        }

        reader = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits)
        writer = Writer(input_dim=dec_dim, output_dim=x_dim, **inits)

        encoder_rnn = LSTM(dim=enc_dim, name="RNN_enc", **rnninits)
        decoder_rnn = LSTM(dim=dec_dim, name="RNN_dec", **rnninits)
        encoder_mlp = MLP([Identity()], [(read_dim + dec_dim), 4 * enc_dim],
                          name="MLP_enc",
                          **inits)
        decoder_mlp = MLP([Identity()], [z_dim, 4 * dec_dim],
                          name="MLP_dec",
                          **inits)
        q_sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, **inits)

        self.draw = DrawModel(n_iter, reader, encoder_mlp, encoder_rnn,
                              q_sampler, decoder_mlp, decoder_rnn, writer)
        self.draw.initialize()
Exemple #25
0
def create_rnn(hidden_dim, vocab_dim, mode="rnn"):
    # input
    x = tensor.imatrix('inchar')
    y = tensor.imatrix('outchar')

    #
    W = LookupTable(
        name="W1",
        #dim = hidden_dim*4,
        dim=hidden_dim,
        length=vocab_dim,
        weights_init=initialization.IsotropicGaussian(0.01),
        biases_init=initialization.Constant(0))
    if mode == "lstm":
        # Long Short Term Memory
        H = LSTM(hidden_dim,
                 name='H',
                 weights_init=initialization.IsotropicGaussian(0.01),
                 biases_init=initialization.Constant(0.0))
    else:
        # recurrent history weight
        H = SimpleRecurrent(
            name="H",
            dim=hidden_dim,
            activation=Tanh(),
            weights_init=initialization.IsotropicGaussian(0.01))
    #
    S = Linear(name="W2",
               input_dim=hidden_dim,
               output_dim=vocab_dim,
               weights_init=initialization.IsotropicGaussian(0.01),
               biases_init=initialization.Constant(0))

    A = NDimensionalSoftmax(name="softmax")

    initLayers([W, H, S])
    activations = W.apply(x)
    hiddens = H.apply(activations)  #[0]
    activations2 = S.apply(hiddens)
    y_hat = A.apply(activations2, extra_ndim=1)
    cost = A.categorical_cross_entropy(y, activations2, extra_ndim=1).mean()

    cg = ComputationGraph(cost)
    #print VariableFilter(roles=[WEIGHT])(cg.variables)
    #W1,H,W2 = VariableFilter(roles=[WEIGHT])(cg.variables)

    layers = (x, W, H, S, A, y)

    return cg, layers, y_hat, cost
Exemple #26
0
    def setUp(self):
        depth = 4
        self.depth = depth
        dim = 3  # don't change, hardwired in the code
        transitions = [LSTM(dim=dim) for _ in range(depth)]
        self.stack0 = RecurrentStack(transitions,
                                     weights_init=Constant(2),
                                     biases_init=Constant(0))
        self.stack0.initialize()

        self.stack2 = RecurrentStack(transitions,
                                     weights_init=Constant(2),
                                     biases_init=Constant(0),
                                     skip_connections=True)
        self.stack2.initialize()
Exemple #27
0
    def __init__(self, word_dim, hidden_dim):
        self.forward_lstm= LSTM(hidden_dim,
                                name='question_forward_lstm',
                                weights_init=IsotropicGaussian(0.01),
                                biases_init=Constant(0))
        self.backward_lstm= LSTM(hidden_dim,
                                 name='question_backward_lstm',
                                 weights_init=IsotropicGaussian(0.01),
                                 biases_init=Constant(0))
        self.x_to_h_forward = Linear(word_dim,
                                     hidden_dim * 4,
                                     name='word_x_to_h_forward',
                                     weights_init=IsotropicGaussian(0.01),
                                     biases_init=Constant(0))
        self.x_to_h_backward = Linear(word_dim,
                                      hidden_dim * 4,
                                      name='word_x_to_h_backward',
                                      weights_init=IsotropicGaussian(0.01),
                                      biases_init=Constant(0))

        self.forward_lstm.initialize()
        self.backward_lstm.initialize()
        self.x_to_h_forward.initialize()
        self.x_to_h_backward.initialize()
    def __init__(self, image_feature_dim, embedding_dim, **kwargs):
        super(Encoder, self).__init__(**kwargs)

        self.image_embedding = Linear(input_dim=image_feature_dim,
                                      output_dim=embedding_dim,
                                      name="image_embedding")

        self.to_inputs = Linear(
            input_dim=embedding_dim,
            output_dim=embedding_dim *
            4  # times 4 cuz vstack(input, forget, cell, hidden)
            ,
            name="to_inputs")

        self.transition = LSTM(dim=embedding_dim, name="transition")

        self.children = [self.image_embedding, self.to_inputs, self.transition]
Exemple #29
0
    def __init__(self, dims=(88, 100, 100), **kwargs):
        super(Rnn, self).__init__(**kwargs)
        self.dims = dims

        self.input_transform = Linear(
            input_dim=dims[0],
            output_dim=dims[1],
            weights_init=IsotropicGaussian(0.01),
            # biases_init=Constant(0.0),
            use_bias=False,
            name="input_transfrom")

        self.gru_layer = SimpleRecurrent(dim=dims[1],
                                         activation=Tanh(),
                                         weights_init=IsotropicGaussian(0.01),
                                         biases_init=Constant(0.0),
                                         use_bias=True,
                                         name="gru_rnn_layer")

        # TODO: find a way to automatically set the output dim in case of lstm vs normal rnn
        self.linear_trans = Linear(input_dim=dims[1],
                                   output_dim=dims[2] * 4,
                                   weights_init=IsotropicGaussian(0.01),
                                   biases_init=Constant(0.0),
                                   use_bias=False,
                                   name="h2h_transform")

        self.lstm_layer = LSTM(dim=dims[2],
                               activation=Tanh(),
                               weights_init=IsotropicGaussian(0.01),
                               biases_init=Constant(0.0),
                               use_bias=True,
                               name="lstm_rnn_layer")

        self.out_transform = MLP(activations=[Sigmoid()],
                                 dims=[dims[2], dims[0]],
                                 weights_init=IsotropicGaussian(0.01),
                                 use_bias=True,
                                 biases_init=Constant(0.0),
                                 name="out_layer")

        self.children = [
            self.input_transform, self.gru_layer, self.linear_trans,
            self.lstm_layer, self.out_transform
        ]
    def __init__(self, dims, **kwargs):
        super(ModularRnn, self).__init__(**kwargs)

        self.layers = []
        self.tranforms = []
        for i, (dim1, dim2) in enumerate(zip(dims[:-1], dims[1:])):
            self.layers.append(
                LSTM(dim=dim1,
                     activation=Tanh(),
                     weights_init=IsotropicGaussian(0.01),
                     biases_init=Constant(0.0),
                     use_bias=True,
                     name="rnn_layer%s" % i))
            self.tranforms.append(
                Linear(input_dim=dim1,
                       output_dim=dim2 * multiplier,
                       weights_init=IsotropicGaussian(0.01),
                       biases_init=Constant(0.0),
                       use_bias=False,
                       name="linea_transform%s" % i))