Ejemplo n.º 1
0
    def create_model(self):
        input_dim = self.input_dim
        x = self.x
        x_to_h = Linear(input_dim,
                        input_dim * 4,
                        name='x_to_h',
                        weights_init=IsotropicGaussian(),
                        biases_init=Constant(0.0))
        lstm = LSTM(input_dim,
                    name='lstm',
                    weights_init=IsotropicGaussian(),
                    biases_init=Constant(0.0))
        h_to_o = Linear(input_dim,
                        1,
                        name='h_to_o',
                        weights_init=IsotropicGaussian(),
                        biases_init=Constant(0.0))

        x_transform = x_to_h.apply(x)
        self.x_to_h = x_to_h
        self.lstm = lstm
        self.h_to_o = h_to_o

        h, c = lstm.apply(x_transform)

        # only values of hidden units of the last timeframe are used for
        # the classification
        probs = h_to_o.apply(h[-1])
        return probs
Ejemplo n.º 2
0
    def __init__(self, feature_dim, memory_dim, fc1_dim, fc2_dim):
        self.W = Linear(input_dim=feature_dim,
                        output_dim=memory_dim * 4,
                        weights_init=IsotropicGaussian(0.01),
                        biases_init=Constant(0),
                        use_bias=False,
                        name='seqDecoder_W')
        self.GRU_A = LSTM(feature_dim,
                          name='seqDecoder_A',
                          weights_init=IsotropicGaussian(0.01),
                          biases_init=Constant(0))
        self.GRU_B = LSTM(memory_dim,
                          name='seqDecoder_B',
                          weights_init=IsotropicGaussian(0.01),
                          biases_init=Constant(0))
        self.W.initialize()
        self.GRU_A.initialize()
        self.GRU_B.initialize()
        self.fc1 = Linear(input_dim=memory_dim,
                          output_dim=fc1_dim,
                          weights_init=IsotropicGaussian(0.01),
                          biases_init=Constant(0),
                          name='fc1')
        self.fc2 = Linear(input_dim=fc1_dim,
                          output_dim=fc2_dim,
                          weights_init=IsotropicGaussian(0.01),
                          biases_init=Constant(0),
                          name='fc2')

        self.fc1.initialize()
        self.fc2.initialize()
Ejemplo n.º 3
0
    def __init__(self, emb_dim, dim, num_input_words, 
                 num_output_words, vocab, 
                 **kwargs):
        if emb_dim == 0:
            emb_dim = dim
        if num_input_words == 0:
            num_input_words = vocab.size()
        if num_output_words == 0:
            num_output_words = vocab.size()

        self._num_input_words = num_input_words
        self._num_output_words = num_output_words
        self._vocab = vocab

        self._word_to_id = WordToIdOp(self._vocab)

        children = []

        self._main_lookup = LookupTable(self._num_input_words, emb_dim, name='main_lookup')
        self._encoder_fork = Linear(emb_dim, 4 * dim, name='encoder_fork')
        self._encoder_rnn = LSTM(dim, name='encoder_rnn')
        self._decoder_fork = Linear(emb_dim, 4 * dim, name='decoder_fork')
        self._decoder_rnn = LSTM(dim, name='decoder_rnn')
        children.extend([self._main_lookup,
                         self._encoder_fork, self._encoder_rnn,
                         self._decoder_fork, self._decoder_rnn])
        self._pre_softmax = Linear(dim, self._num_output_words)
        self._softmax = NDimensionalSoftmax()
        children.extend([self._pre_softmax, self._softmax])

        super(LanguageModel, self).__init__(children=children, **kwargs)
Ejemplo n.º 4
0
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        x = tensor.tensor3('x', dtype=floatX)
        y = tensor.tensor3('y', dtype=floatX)

        x_to_lstm = Linear(name="x_to_lstm", input_dim=input_size, output_dim=4 * hidden_size,
                           weights_init=IsotropicGaussian(), biases_init=Constant(0))
        lstm = LSTM(dim=hidden_size, name="lstm", weights_init=IsotropicGaussian(), biases_init=Constant(0))
        lstm_to_output = Linear(name="lstm_to_output", input_dim=hidden_size, output_dim=output_size,
                                weights_init=IsotropicGaussian(), biases_init=Constant(0))

        x_transform = x_to_lstm.apply(x)
        h, c = lstm.apply(x_transform)

        y_hat = lstm_to_output.apply(h)
        y_hat = Logistic(name="y_hat").apply(y_hat)

        self.cost = BinaryCrossEntropy(name="cost").apply(y, y_hat)

        x_to_lstm.initialize()
        lstm.initialize()
        lstm_to_output.initialize()

        self.computation_graph = ComputationGraph(self.cost)
Ejemplo n.º 5
0
    def apply(self, input_, target):
        x_to_h = Linear(name='x_to_h',
                        input_dim=self.dims[0],
                        output_dim=self.dims[1] * 4)
        pre_rnn = x_to_h.apply(input_)
        pre_rnn.name = 'pre_rnn'
        rnn = LSTM(activation=Tanh(), dim=self.dims[1], name=self.name)
        h, _ = rnn.apply(pre_rnn)
        h.name = 'h'
        h_to_y = Linear(name='h_to_y',
                        input_dim=self.dims[1],
                        output_dim=self.dims[2])
        y_hat = h_to_y.apply(h)
        y_hat.name = 'y_hat'

        cost = SquaredError().apply(target, y_hat)
        cost.name = 'MSE'

        self.outputs = {}
        self.outputs['y_hat'] = y_hat
        self.outputs['cost'] = cost
        self.outputs['pre_rnn'] = pre_rnn
        self.outputs['h'] = h

        # Initialization
        for brick in (rnn, x_to_h, h_to_y):
            brick.weights_init = IsotropicGaussian(0.01)
            brick.biases_init = Constant(0)
            brick.initialize()
Ejemplo n.º 6
0
    def __init__(self,
                 input_dim,
                 output_dim,
                 lstm_dim,
                 print_intermediate=False,
                 print_attrs=['__str__'],
                 **kwargs):
        super(LinearLSTM, self).__init__(**kwargs)

        self.x_to_h = Linear(input_dim,
                             lstm_dim * 4,
                             name='x_to_h',
                             weights_init=IsotropicGaussian(),
                             biases_init=Constant(0.0))
        self.lstm = LSTM(lstm_dim,
                         name='lstm',
                         weights_init=IsotropicGaussian(),
                         biases_init=Constant(0.0))
        self.h_to_o = Linear(lstm_dim,
                             output_dim,
                             name='h_to_o',
                             weights_init=IsotropicGaussian(),
                             biases_init=Constant(0.0))

        self.children = [self.x_to_h, self.lstm, self.h_to_o]

        self.print_intermediate = print_intermediate
        self.print_attrs = print_attrs
Ejemplo n.º 7
0
def make_bidir_lstm_stack(seq, seq_dim, mask, sizes, skip=True, name=''):
    bricks = []

    curr_dim = [seq_dim]
    curr_hidden = [seq]

    hidden_list = []
    for k, dim in enumerate(sizes):
        fwd_lstm_ins = [Linear(input_dim=d, output_dim=4*dim, name='%s_fwd_lstm_in_%d_%d'%(name,k,l)) for l, d in enumerate(curr_dim)]
        fwd_lstm = LSTM(dim=dim, activation=Tanh(), name='%s_fwd_lstm_%d'%(name,k))

        bwd_lstm_ins = [Linear(input_dim=d, output_dim=4*dim, name='%s_bwd_lstm_in_%d_%d'%(name,k,l)) for l, d in enumerate(curr_dim)]
        bwd_lstm = LSTM(dim=dim, activation=Tanh(), name='%s_bwd_lstm_%d'%(name,k))

        bricks = bricks + [fwd_lstm, bwd_lstm] + fwd_lstm_ins + bwd_lstm_ins

        fwd_tmp = sum(x.apply(v) for x, v in zip(fwd_lstm_ins, curr_hidden))
        bwd_tmp = sum(x.apply(v) for x, v in zip(bwd_lstm_ins, curr_hidden))
        fwd_hidden, _ = fwd_lstm.apply(fwd_tmp, mask=mask)
        bwd_hidden, _ = bwd_lstm.apply(bwd_tmp[::-1], mask=mask[::-1])
        hidden_list = hidden_list + [fwd_hidden, bwd_hidden]
        if skip:
            curr_hidden = [seq, fwd_hidden, bwd_hidden[::-1]]
            curr_dim = [seq_dim, dim, dim]
        else:
            curr_hidden = [fwd_hidden, bwd_hidden[::-1]]
            curr_dim = [dim, dim]

    return bricks, hidden_list
Ejemplo n.º 8
0
    def __init__(self, config, **kwargs):
        super(Model, self).__init__(**kwargs)
        self.config = config

        self.pre_context_embedder = ContextEmbedder(
            config.pre_embedder, name='pre_context_embedder')
        self.post_context_embedder = ContextEmbedder(
            config.post_embedder, name='post_context_embedder')

        in1 = 2 + sum(x[2] for x in config.pre_embedder.dim_embeddings)
        self.input_to_rec = MLP(activations=[Tanh()],
                                dims=[in1, config.hidden_state_dim],
                                name='input_to_rec')

        self.rec = LSTM(dim=config.hidden_state_dim, name='recurrent')

        in2 = config.hidden_state_dim + sum(
            x[2] for x in config.post_embedder.dim_embeddings)
        self.rec_to_output = MLP(activations=[Tanh()],
                                 dims=[in2, 2],
                                 name='rec_to_output')

        self.sequences = ['latitude', 'latitude_mask', 'longitude']
        self.context = self.pre_context_embedder.inputs + self.post_context_embedder.inputs
        self.inputs = self.sequences + self.context
        self.children = [
            self.pre_context_embedder, self.post_context_embedder,
            self.input_to_rec, self.rec, self.rec_to_output
        ]

        self.initial_state_ = shared_floatx_zeros((config.hidden_state_dim, ),
                                                  name="initial_state")
        self.initial_cells = shared_floatx_zeros((config.hidden_state_dim, ),
                                                 name="initial_cells")
Ejemplo n.º 9
0
    def __init__(self, dim, mini_dim, summary_dim, **kwargs):
        super(LSTMwMini, self).__init__(**kwargs)
        self.dim = dim
        self.mini_dim = mini_dim
        self.summary_dim = summary_dim

        self.recurrent_layer = LSTM(dim=self.summary_dim,
                                    activation=Rectifier(),
                                    name='recurrent_layer',
                                    weights_init=IsotropicGaussian(),
                                    biases_init=Constant(0.0))
        self.mini_recurrent_layer = LSTM(dim=self.mini_dim,
                                         activation=Rectifier(),
                                         name='mini_recurrent_layer',
                                         weights_init=IsotropicGaussian(),
                                         biases_init=Constant(0.0))

        self.mini_to_main = Linear(self.dim + self.mini_dim,
                                   self.summary_dim,
                                   name='mini_to_main',
                                   weights_init=IsotropicGaussian(),
                                   biases_init=Constant(0.0))
        self.mini_to_main2 = Linear(self.summary_dim,
                                    self.summary_dim * 4,
                                    name='mini_to_main2',
                                    weights_init=IsotropicGaussian(),
                                    biases_init=Constant(0.0))

        self.children = [
            self.recurrent_layer, self.mini_recurrent_layer, self.mini_to_main,
            self.mini_to_main2
        ]
Ejemplo n.º 10
0
def lstm_layer(in_dim, h, h_dim, n, pref=""):
    linear = Linear(input_dim=in_dim,
                    output_dim=h_dim * 4,
                    name='linear' + str(n) + pref)
    lstm = LSTM(dim=h_dim, name='lstm' + str(n) + pref)
    initialize([linear, lstm])
    return lstm.apply(linear.apply(h))[0]
Ejemplo n.º 11
0
    def apply(self, input_, target):
        x_to_h = Linear(name='x_to_h',
                        input_dim=self.dims[0],
                        output_dim=self.dims[1] * 4)
        pre_rnn = x_to_h.apply(input_)
        pre_rnn.name = 'pre_rnn'
        rnn = LSTM(activation=Tanh(),
                   dim=self.dims[1], name=self.name)
        h, _ = rnn.apply(pre_rnn)
        h.name = 'h'
        h_to_y = Linear(name='h_to_y',
                        input_dim=self.dims[1],
                        output_dim=self.dims[2])
        y_hat = h_to_y.apply(h)
        y_hat.name = 'y_hat'

        cost = SquaredError().apply(target, y_hat)
        cost.name = 'MSE'

        self.outputs = {}
        self.outputs['y_hat'] = y_hat
        self.outputs['cost'] = cost
        self.outputs['pre_rnn'] = pre_rnn
        self.outputs['h'] = h

        # Initialization
        for brick in (rnn, x_to_h, h_to_y):
            brick.weights_init = IsotropicGaussian(0.01)
            brick.biases_init = Constant(0)
            brick.initialize()
Ejemplo n.º 12
0
    def __init__(self,
                 num_input_words,
                 emb_dim,
                 dim,
                 vocab,
                 lookup=None,
                 fork_and_rnn=None,
                 **kwargs):

        if num_input_words > 0:
            logger.info("Restricting def vocab to " + str(num_input_words))
            self._num_input_words = num_input_words
        else:
            self._num_input_words = vocab.size()

        self._vocab = vocab

        children = []

        if lookup is None:
            self._def_lookup = LookupTable(self._num_input_words,
                                           emb_dim,
                                           name='def_lookup')
        else:
            self._def_lookup = lookup

        if fork_and_rnn is None:
            self._def_fork = Linear(emb_dim, 4 * dim, name='def_fork')
            self._def_rnn = LSTM(dim, name='def_rnn')
        else:
            self._def_fork, self._def_rnn = fork_and_rnn

        children.extend([self._def_lookup, self._def_fork, self._def_rnn])

        super(LSTMReadDefinitions, self).__init__(children=children, **kwargs)
Ejemplo n.º 13
0
def main(max_seq_length, lstm_dim, batch_size, num_batches, num_epochs):
    dataset_train = IterableDataset(generate_data(max_seq_length, batch_size,
                                                  num_batches))
    dataset_test = IterableDataset(generate_data(max_seq_length, batch_size,
                                                 100))

    stream_train = DataStream(dataset=dataset_train)
    stream_test = DataStream(dataset=dataset_test)

    x = T.tensor3('x')
    y = T.matrix('y')

    # we need to provide data for the LSTM layer of size 4 * ltsm_dim, see
    # LSTM layer documentation for the explanation
    x_to_h = Linear(1, lstm_dim * 4, name='x_to_h',
                    weights_init=IsotropicGaussian(),
                    biases_init=Constant(0.0))
    lstm = LSTM(lstm_dim, name='lstm',
                weights_init=IsotropicGaussian(),
                biases_init=Constant(0.0))
    h_to_o = Linear(lstm_dim, 1, name='h_to_o',
                    weights_init=IsotropicGaussian(),
                    biases_init=Constant(0.0))

    x_transform = x_to_h.apply(x)
    h, c = lstm.apply(x_transform)

    # only values of hidden units of the last timeframe are used for
    # the classification
    y_hat = h_to_o.apply(h[-1])
    y_hat = Logistic().apply(y_hat)

    cost = BinaryCrossEntropy().apply(y, y_hat)
    cost.name = 'cost'

    lstm.initialize()
    x_to_h.initialize()
    h_to_o.initialize()

    cg = ComputationGraph(cost)

    algorithm = GradientDescent(cost=cost, parameters=cg.parameters,
                                step_rule=Adam())
    test_monitor = DataStreamMonitoring(variables=[cost],
                                        data_stream=stream_test, prefix="test")
    train_monitor = TrainingDataMonitoring(variables=[cost], prefix="train",
                                           after_epoch=True)

    main_loop = MainLoop(algorithm, stream_train,
                         extensions=[test_monitor, train_monitor,
                                     FinishAfter(after_n_epochs=num_epochs),
                                     Printing(), ProgressBar()])
    main_loop.run()

    print 'Learned weights:'
    for layer in (x_to_h, lstm, h_to_o):
        print "Layer '%s':" % layer.name
        for param in layer.parameters:
            print param.name, ': ', param.get_value()
        print
Ejemplo n.º 14
0
    def __init__(self, feature_dim, hidden_dim, output_dim):
        self.image_embed = Linear(input_dim=feature_dim,
                                  output_dim=hidden_dim,
                                  weights_init=IsotropicGaussian(0.01),
                                  biases_init=Constant(0),
                                  use_bias=False,
                                  name='image_embed')
        self.word_embed = Linear(input_dim=feature_dim,
                                 output_dim=hidden_dim,
                                 weights_init=IsotropicGaussian(0.01),
                                 biases_init=Constant(0),
                                 use_bias=False,
                                 name='word_embed')
        self.r_embed = Linear(input_dim=feature_dim,
                              output_dim=hidden_dim,
                              weights_init=IsotropicGaussian(0.01),
                              biases_init=Constant(0),
                              use_bias=False,
                              name='r_embed')
        self.m_to_s = Linear(input_dim=hidden_dim,
                             output_dim=1,
                             weights_init=IsotropicGaussian(0.01),
                             biases_init=Constant(0),
                             use_bias=False,
                             name='m_to_s')
        self.attention_dist = Softmax(name='attention_dist_softmax')
        self.r_to_r = Linear(input_dim=feature_dim,
                             output_dim=feature_dim,
                             weights_init=IsotropicGaussian(0.01),
                             biases_init=Constant(0),
                             use_bias=False,
                             name='r_to_r')
        # self.r_to_g = Linear(input_dim=feature_dim,
        #                      output_dim=output_dim,
        #                      weights_init=IsotropicGaussian(0.01),
        #                      biases_init=Constant(0),
        #                      use_bias=False,
        #                      name='r_to_g')
        self.image_embed.initialize()
        self.word_embed.initialize()
        self.r_embed.initialize()
        self.m_to_s.initialize()
        self.r_to_r.initialize()
        # self.r_to_g.initialize()

        # the sequence to sequence LSTM
        self.seq = LSTM(output_dim,
                        name='rewatcher_seq',
                        weights_init=IsotropicGaussian(0.01),
                        biases_init=Constant(0))
        self.seq_embed = Linear(feature_dim,
                                output_dim * 4,
                                name='rewatcher_seq_embed',
                                weights_init=IsotropicGaussian(0.01),
                                biases_init=Constant(0),
                                use_bias=False)

        self.seq.initialize()
        self.seq_embed.initialize()
Ejemplo n.º 15
0
def lstm_layer(in_size, dim, x, h, n, task, first_layer=False):
    if connect_h_to_h == 'all-previous':
        if first_layer:
            lstm_input = x
            linear = Linear(input_dim=in_size,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-' + str(task))
        elif connect_x_to_h:
            lstm_input = T.concatenate([x] + [hidden for hidden in h], axis=2)
            linear = Linear(input_dim=in_size + dim * (n),
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-' + str(task))
        else:
            lstm_input = T.concatenate([hidden for hidden in h], axis=2)
            linear = Linear(input_dim=dim * (n + 1),
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-' + str(task))
    elif connect_h_to_h == 'two-previous':
        if first_layer:
            lstm_input = x
            linear = Linear(input_dim=in_size,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-' + str(task))
        elif connect_x_to_h:
            lstm_input = T.concatenate([x] + h[max(0, n - 2):n], axis=2)
            linear = Linear(input_dim=in_size + dim * 2 if n > 1 else in_size +
                            dim,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-' + str(task))
        else:
            lstm_input = T.concatenate(h[max(0, n - 2):n], axis=2)
            linear = Linear(input_dim=dim * 2 if n > 1 else dim,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-' + str(task))
    elif connect_h_to_h == 'one-previous':
        if first_layer:
            lstm_input = x
            linear = Linear(input_dim=in_size,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-' + str(task))
        elif connect_x_to_h:
            lstm_input = T.concatenate([x] + [h[n - 1]], axis=2)
            linear = Linear(input_dim=in_size + dim,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-' + str(task))
        else:
            lstm_input = h[n - 1]
            linear = Linear(input_dim=dim,
                            output_dim=dim * 4,
                            name='linear' + str(n) + '-' + str(task))
    lstm = LSTM(dim=dim, name=layer_models[n] + str(n) + '-' + str(task))
    initialize([linear, lstm])
    if layer_models[n] == 'lstm':
        return lstm.apply(linear.apply(lstm_input))
    elif layer_models[n] == 'mt_lstm':
        return lstm.apply(linear.apply(lstm_input),
                          time_scale=layer_resolutions[n],
                          time_offset=layer_execution_time_offset[n])
Ejemplo n.º 16
0
def construct_model(activation_function, r_dim, hidden_dim, out_dim):
    # Construct the model
    r = tensor.fmatrix('r')
    x = tensor.fmatrix('x')
    y = tensor.ivector('y')

    nx = x.shape[0]
    nj = x.shape[1]  # also is r.shape[0]
    nr = r.shape[1]

    # r is nj x nr
    # x is nx x nj
    # y is nx

    # Get a representation of r of size r_dim
    r = DAE(r)

    # r is now nj x r_dim

    # r_rep is nx x nj x r_dim
    r_rep = r[None, :, :].repeat(axis=0, repeats=nx)
    # x3 is nx x nj x 1
    x3 = x[:, :, None]

    # concat is nx x nj x (r_dim + 1)
    concat = tensor.concatenate([r_rep, x3], axis=2)

    # Change concat from Batch x Time x Features to T X B x F
    rnn_input = concat.dimshuffle(1, 0, 2)

    linear = Linear(input_dim=r_dim + 1,
                    output_dim=4 * hidden_dim,
                    name="input_linear")
    lstm = LSTM(dim=hidden_dim,
                activation=activation_function,
                name="hidden_recurrent")
    top_linear = Linear(input_dim=hidden_dim,
                        output_dim=out_dim,
                        name="out_linear")

    pre_rnn = linear.apply(rnn_input)
    states = lstm.apply(pre_rnn)[0]
    activations = top_linear.apply(states)
    activations = tensor.mean(activations, axis=0)

    cost = Softmax().categorical_cross_entropy(y, activations)

    pred = activations.argmax(axis=1)
    error_rate = tensor.neq(y, pred).mean()

    # Initialize parameters

    for brick in (linear, lstm, top_linear):
        brick.weights_init = IsotropicGaussian(0.1)
        brick.biases_init = Constant(0.)
        brick.initialize()

    return cost, error_rate
Ejemplo n.º 17
0
def create_rnn(hidden_dim, vocab_dim,mode="rnn"):
    # input
    x = tensor.imatrix('inchar')
    y = tensor.imatrix('outchar')

    # 
    W = LookupTable(
        name = "W1",
        #dim = hidden_dim*4,
        dim = hidden_dim,
        length = vocab_dim,
        weights_init = initialization.IsotropicGaussian(0.01),
        biases_init = initialization.Constant(0)
    )
    if mode == "lstm":
        # Long Short Term Memory
        H = LSTM(
            hidden_dim, 
            name = 'H',
            weights_init = initialization.IsotropicGaussian(0.01),
            biases_init = initialization.Constant(0.0)
        )
    else:
        # recurrent history weight
        H = SimpleRecurrent(
            name = "H",
            dim = hidden_dim,
            activation = Tanh(),
            weights_init = initialization.IsotropicGaussian(0.01)
        )
    # 
    S = Linear(
        name = "W2",
        input_dim = hidden_dim,
        output_dim = vocab_dim,
        weights_init = initialization.IsotropicGaussian(0.01),
        biases_init = initialization.Constant(0)
    )

    A = NDimensionalSoftmax(
        name = "softmax"
    )

    initLayers([W,H,S])
    activations = W.apply(x)
    hiddens = H.apply(activations)#[0]
    activations2 = S.apply(hiddens)
    y_hat = A.apply(activations2, extra_ndim=1)
    cost = A.categorical_cross_entropy(y, activations2, extra_ndim=1).mean()

    cg = ComputationGraph(cost)
    #print VariableFilter(roles=[WEIGHT])(cg.variables)
    #W1,H,W2 = VariableFilter(roles=[WEIGHT])(cg.variables)

    layers = (x, W, H, S, A, y)

    return  cg, layers, y_hat, cost
Ejemplo n.º 18
0
def add_lstm(input_dim, input_var):
    linear = Linear(input_dim=input_dim,output_dim=input_dim*4,name="linear_layer")
    lstm = LSTM(dim=input_dim, name="lstm_layer")

    testing_init(linear)
    #linear.initialize()
    default_init(lstm)

    h = linear.apply(input_var)
    return lstm.apply(h)
def construct_model(activation_function, r_dim, hidden_dim, out_dim):
    # Construct the model
    r = tensor.fmatrix('r')
    x = tensor.fmatrix('x')
    y = tensor.ivector('y')

    nx = x.shape[0]
    nj = x.shape[1]  # also is r.shape[0]
    nr = r.shape[1]

    # r is nj x nr
    # x is nx x nj
    # y is nx

    # Get a representation of r of size r_dim
    r = DAE(r)

    # r is now nj x r_dim

    # r_rep is nx x nj x r_dim
    r_rep = r[None, :, :].repeat(axis=0, repeats=nx)
    # x3 is nx x nj x 1
    x3 = x[:, :, None]

    # concat is nx x nj x (r_dim + 1)
    concat = tensor.concatenate([r_rep, x3], axis=2)

    # Change concat from Batch x Time x Features to T X B x F
    rnn_input = concat.dimshuffle(1, 0, 2)

    linear = Linear(input_dim=r_dim + 1, output_dim=4 * hidden_dim,
                    name="input_linear")
    lstm = LSTM(dim=hidden_dim, activation=activation_function,
                name="hidden_recurrent")
    top_linear = Linear(input_dim=hidden_dim, output_dim=out_dim,
                        name="out_linear")

    pre_rnn = linear.apply(rnn_input)
    states = lstm.apply(pre_rnn)[0]
    activations = top_linear.apply(states)
    activations = tensor.mean(activations, axis=0)

    cost = Softmax().categorical_cross_entropy(y, activations)

    pred = activations.argmax(axis=1)
    error_rate = tensor.neq(y, pred).mean()

    # Initialize parameters

    for brick in (linear, lstm, top_linear):
        brick.weights_init = IsotropicGaussian(0.1)
        brick.biases_init = Constant(0.)
        brick.initialize()

    return cost, error_rate
Ejemplo n.º 20
0
    def create_model(self):
        input_dim = self.input_dim
        x = self.x
        y = self.y
        p = self.p
        mask = self.mask
        hidden_dim = self.hidden_dim
        embedding_dim = self.embedding_dim
        lookup = LookupTable(self.dict_size,
                             embedding_dim,
                             weights_init=IsotropicGaussian(0.001),
                             name='LookupTable')
        x_to_h = Linear(embedding_dim,
                        hidden_dim * 4,
                        name='x_to_h',
                        weights_init=IsotropicGaussian(0.001),
                        biases_init=Constant(0.0))
        lstm = LSTM(hidden_dim,
                    name='lstm',
                    weights_init=IsotropicGaussian(0.001),
                    biases_init=Constant(0.0))
        h_to_o = MLP([Logistic()], [hidden_dim, 1],
                     weights_init=IsotropicGaussian(0.001),
                     biases_init=Constant(0),
                     name='h_to_o')

        lookup.initialize()
        x_to_h.initialize()
        lstm.initialize()
        h_to_o.initialize()

        embed = lookup.apply(x).reshape(
            (x.shape[0], x.shape[1], self.embedding_dim))
        embed.name = "embed_vec"
        x_transform = x_to_h.apply(embed.transpose(1, 0, 2))
        x_transform.name = "Transformed X"
        self.lookup = lookup
        self.x_to_h = x_to_h
        self.lstm = lstm
        self.h_to_o = h_to_o

        #if mask is None:
        h, c = lstm.apply(x_transform)
        #else:
        #h, c = lstm.apply(x_transform, mask=mask)
        h.name = "hidden_state"
        c.name = "cell state"
        # only values of hidden units of the last timeframe are used for
        # the classification
        indices = T.sum(mask, axis=0) - 1
        rel_hid = h[indices, T.arange(h.shape[1])]
        out = self.h_to_o.apply(rel_hid)

        probs = out
        return probs
Ejemplo n.º 21
0
def lstm_layer(dim, h, n, x_mask, first, **kwargs):
    linear = Linear(input_dim=dim, output_dim=dim * 4, name='linear' + str(n))
    lstm = LSTM(dim=dim, activation=Rectifier(), name='lstm' + str(n))
    initialize([linear, lstm])
    applyLin = linear.apply(h)

    if first:
        lstmApply = lstm.apply(applyLin, mask=x_mask, **kwargs)[0]
    else:
        lstmApply = lstm.apply(applyLin, **kwargs)[0]
    return lstmApply
 def lstm_layer(self, h, n):
     """
     Performs the LSTM update for a batch of word sequences
     :param h The word embeddings for this update
     :param n The number of layers of the LSTM
     """
     # Maps the word embedding to a dimensionality to be used in the LSTM
     linear = Linear(input_dim=self.hidden_size, output_dim=self.hidden_size * 4, name='linear_lstm' + str(n))
     initialize(linear, sqrt(6.0 / (5 * self.hidden_size)))
     lstm = LSTM(dim=self.hidden_size, name='lstm' + str(n))
     initialize(lstm, 0.08)
     return lstm.apply(linear.apply(h))
Ejemplo n.º 23
0
def create_rnn(hidden_dim, vocab_dim, mode="rnn"):
    # input
    x = tensor.imatrix('inchar')
    y = tensor.imatrix('outchar')

    #
    W = LookupTable(
        name="W1",
        #dim = hidden_dim*4,
        dim=hidden_dim,
        length=vocab_dim,
        weights_init=initialization.IsotropicGaussian(0.01),
        biases_init=initialization.Constant(0))
    if mode == "lstm":
        # Long Short Term Memory
        H = LSTM(hidden_dim,
                 name='H',
                 weights_init=initialization.IsotropicGaussian(0.01),
                 biases_init=initialization.Constant(0.0))
    else:
        # recurrent history weight
        H = SimpleRecurrent(
            name="H",
            dim=hidden_dim,
            activation=Tanh(),
            weights_init=initialization.IsotropicGaussian(0.01))
    #
    S = Linear(name="W2",
               input_dim=hidden_dim,
               output_dim=vocab_dim,
               weights_init=initialization.IsotropicGaussian(0.01),
               biases_init=initialization.Constant(0))

    A = NDimensionalSoftmax(name="softmax")

    initLayers([W, H, S])
    activations = W.apply(x)
    hiddens = H.apply(activations)  #[0]
    activations2 = S.apply(hiddens)
    y_hat = A.apply(activations2, extra_ndim=1)
    cost = A.categorical_cross_entropy(y, activations2, extra_ndim=1).mean()

    cg = ComputationGraph(cost)
    #print VariableFilter(roles=[WEIGHT])(cg.variables)
    #W1,H,W2 = VariableFilter(roles=[WEIGHT])(cg.variables)

    layers = (x, W, H, S, A, y)

    return cg, layers, y_hat, cost
Ejemplo n.º 24
0
class CoreNetwork(BaseRecurrent, Initializable):
    def __init__(self, input_dim, dim, **kwargs):
        super(CoreNetwork, self).__init__(**kwargs)
        self.input_dim = input_dim
        self.dim = dim
        self.lstm = LSTM(dim=dim, name=self.name + '_lstm',
                         weights_init=self.weights_init,
                         biases_init=self.biases_init)

        self.proj = Linear(input_dim=input_dim, output_dim=dim*4,
                           name=self.name + '_proj',
                           weights_init=self.weights_init,
                           biases_init=self.biases_init)
        self.children = [self.lstm, self.proj]

    def get_dim(self, name):
        if name == 'inputs':
            return self.input_dim
        elif name in ['state', 'cell']:
            return self.dim
        else:
            raise ValueError

    @recurrent(sequences=['inputs'], states=['state', 'cell'], contexts=[],
               outputs=['state', 'cell'])
    def apply(self, inputs, state, cell):
        state, cell = self.lstm.apply(self.proj.apply(inputs), state, cell,
                                      iterate=False)
        return state, cell
    def __init__(self, batch_size, num_subwords, num_words, subword_embedding_size, input_vocab_size,
                 subword_RNN_hidden_state_size, table_width=0.08, init_type='xavier', **kwargs):

        super(LSTMCompositionalLayer, self).__init__(**kwargs)

        self.batch_size = batch_size
        self.num_subwords = num_subwords # number of subwords which make up a word
        self.num_words = num_words  # number of words in the sentence
        self.subword_embedding_size = subword_embedding_size
        self.input_vocab_size = input_vocab_size
        self.subword_RNN_hidden_state_size = subword_RNN_hidden_state_size
        self.table_width = table_width

        # create the look up table
        self.lookup = LookupTable(length=self.input_vocab_size, dim=self.subword_embedding_size, name='input_lookup')
        self.lookup.weights_init = Uniform(width=table_width)
        self.lookup.biases_init = Constant(0)

        if init_type == 'xavier':
            linear_init = XavierInitializationOriginal(self.subword_embedding_size, self.subword_RNN_hidden_state_size)
            lstm_init = XavierInitializationOriginal(self.subword_embedding_size, self.subword_RNN_hidden_state_size)
        else:  # default is gaussian
            linear_init = IsotropicGaussian()
            lstm_init = IsotropicGaussian()

        # The `inputs` are then split in this order: Input gates, forget gates, cells and output gates
        self.linear_forward = Linear(input_dim=self.subword_embedding_size, output_dim=self.subword_RNN_hidden_state_size * 4,
                                     name='linear_forward', weights_init=linear_init, biases_init=Constant(0.0))

        self.compositional_subword_to_word_RNN_forward = LSTM(
            dim=self.subword_RNN_hidden_state_size, activation=Tanh(), name='subword_RNN_forward',
            weights_init=lstm_init, biases_init=Constant(0.0))

        self.children = [self.lookup, self.linear_forward, self.compositional_subword_to_word_RNN_forward]
Ejemplo n.º 26
0
 def __init__(self, image_shape, patch_shape, hidden_dim,
              n_spatial_dims, whatwhere_interaction, prefork_area_transform,
              postmerge_area_transform, patch_transform, batch_normalize,
              response_transform, location_std, scale_std, cutoff,
              batched_window, initargs, emitter, **kwargs):
     self.rnn = LSTM(activation=Tanh(),
                     dim=hidden_dim,
                     name="recurrent",
                     weights_init=IsotropicGaussian(1e-4),
                     biases_init=Constant(0))
     self.locator = masonry.Locator(hidden_dim, n_spatial_dims,
                                    area_transform=prefork_area_transform,
                                    location_std=location_std,
                                    scale_std=scale_std,
                                    **initargs)
     self.cropper = crop.LocallySoftRectangularCropper(
         n_spatial_dims=n_spatial_dims,
         image_shape=image_shape, patch_shape=patch_shape,
         kernel=crop.Gaussian(), cutoff=cutoff,
         batched_window=batched_window)
     self.merger = masonry.Merger(
         patch_transform=patch_transform,
         area_transform=postmerge_area_transform,
         response_transform=response_transform,
         n_spatial_dims=n_spatial_dims,
         whatwhere_interaction=whatwhere_interaction,
         batch_normalize=batch_normalize,
         **initargs)
     self.attention = masonry.SpatialAttention(
         self.locator, self.cropper, self.merger,
         name="sa")
     self.emitter = emitter
     self.model = masonry.RecurrentAttentionModel(
         self.rnn, self.attention, self.emitter,
         name="ram")
Ejemplo n.º 27
0
def bilstm_layer(in_dim, inp, h_dim, n):
    linear = Linear(input_dim=in_dim, output_dim=h_dim * 4, name='linear' + str(n)+inp.name)
    lstm = LSTM(dim=h_dim, name='lstm' + str(n)+inp.name)
    bilstm = Bidirectional(prototype=lstm)
    bilstm.name = 'bilstm' + str(n) + inp.name
    initialize([linear, bilstm])
    return bilstm.apply(linear.apply(inp))[0]
Ejemplo n.º 28
0
    def __init__(self, config, **kwargs):
        super(Model, self).__init__(**kwargs)
        self.config = config

        self.pre_context_embedder = ContextEmbedder(config.pre_embedder, name='pre_context_embedder')
        self.post_context_embedder = ContextEmbedder(config.post_embedder, name='post_context_embedder')

        in1 = 2 + sum(x[2] for x in config.pre_embedder.dim_embeddings)
        self.input_to_rec = MLP(activations=[Tanh()], dims=[in1, config.hidden_state_dim], name='input_to_rec')

        self.rec = LSTM(
                dim = config.hidden_state_dim,
                name = 'recurrent'
            )

        in2 = config.hidden_state_dim + sum(x[2] for x in config.post_embedder.dim_embeddings)
        self.rec_to_output = MLP(activations=[Tanh()], dims=[in2, 2], name='rec_to_output')

        self.sequences = ['latitude', 'latitude_mask', 'longitude']
        self.context = self.pre_context_embedder.inputs + self.post_context_embedder.inputs
        self.inputs = self.sequences + self.context
        self.children = [ self.pre_context_embedder, self.post_context_embedder, self.input_to_rec, self.rec, self.rec_to_output ]

        self.initial_state_ = shared_floatx_zeros((config.hidden_state_dim,),
                name="initial_state")
        self.initial_cells = shared_floatx_zeros((config.hidden_state_dim,),
                name="initial_cells")
Ejemplo n.º 29
0
    def __init__(self, image_feature_dim, embedding_dim, **kwargs):
        super(Encoder, self).__init__(**kwargs)

        self.image_embedding = Linear(
              input_dim=image_feature_dim
            , output_dim=embedding_dim
            # , weights_init=IsotropicGaussian(0.02)
            # , biases_init=Constant(0.)
            , name="image_embedding"
            )

        self.to_inputs = Linear(
              input_dim=embedding_dim
            , output_dim=embedding_dim*4 # gate_inputs = vstack(input, forget, cell, hidden)
            # , weights_init=IsotropicGaussian(0.02)
            # , biases_init=Constant(0.)
            , name="to_inputs"
            )

        # Don't think this dim has to also be dimension, more arbitrary
        self.transition = LSTM(
            dim=embedding_dim, name="transition")

        self.children = [ self.image_embedding
                        , self.to_inputs
                        , self.transition
                        ]
Ejemplo n.º 30
0
class LinearLSTM(Initializable):
    def __init__(self,
                 input_dim,
                 output_dim,
                 lstm_dim,
                 print_intermediate=False,
                 print_attrs=['__str__'],
                 **kwargs):
        super(LinearLSTM, self).__init__(**kwargs)

        self.x_to_h = Linear(input_dim,
                             lstm_dim * 4,
                             name='x_to_h',
                             weights_init=IsotropicGaussian(),
                             biases_init=Constant(0.0))
        self.lstm = LSTM(lstm_dim,
                         name='lstm',
                         weights_init=IsotropicGaussian(),
                         biases_init=Constant(0.0))
        self.h_to_o = Linear(lstm_dim,
                             output_dim,
                             name='h_to_o',
                             weights_init=IsotropicGaussian(),
                             biases_init=Constant(0.0))

        self.children = [self.x_to_h, self.lstm, self.h_to_o]

        self.print_intermediate = print_intermediate
        self.print_attrs = print_attrs

    @application
    def apply(self, source):

        x_linear = self.x_to_h.apply(
            source.reshape(
                (source.shape[1], source.shape[0], source.shape[2])))
        x_linear.name = 'x_linear'
        if self.print_intermediate:
            x_linear = Print(message='x_linear info',
                             attrs=self.print_attrs)(x_linear)

        h, c = self.lstm.apply(x_linear)
        if self.print_intermediate:
            h = Print(message="hidden states info", attrs=self.print_attrs)(h)

        y_hat = self.h_to_o.apply(h)
        y_hat.name = 'y_hat'
        if self.print_intermediate:
            y_hat = Print(message="y_hat info", attrs=self.print_attrs)(y_hat)

        return y_hat

    def initialize(self):
        for child in self.children:
            child.initialize()

    def reset_allocation(self):
        for child in self.children:
            child.allocated = False
Ejemplo n.º 31
0
    def __init__(self, input1_size, input2_size, lookup1_dim=200, lookup2_dim=200, hidden_size=512):
        self.hidden_size = hidden_size
        self.input1_size = input1_size
        self.input2_size = input2_size
        self.lookup1_dim = lookup1_dim
        self.lookup2_dim = lookup2_dim

        x1 = tensor.lmatrix('durations')
        x2 = tensor.lmatrix('syllables')
        y = tensor.lmatrix('pitches')

        lookup1 = LookupTable(dim=self.lookup1_dim, length=self.input1_size, name='lookup1',
                              weights_init=initialization.Uniform(width=0.01),
                              biases_init=Constant(0))
        lookup1.initialize()
        lookup2 = LookupTable(dim=self.lookup2_dim, length=self.input2_size, name='lookup2',
                              weights_init=initialization.Uniform(width=0.01),
                              biases_init=Constant(0))
        lookup2.initialize()
        merge = Merge(['lookup1', 'lookup2'], [self.lookup1_dim, self.lookup2_dim], self.hidden_size,
                              weights_init=initialization.Uniform(width=0.01),
                              biases_init=Constant(0))
        merge.initialize()
        recurrent_block = LSTM(dim=self.hidden_size, activation=Tanh(),
                              weights_init=initialization.Uniform(width=0.01)) #RecurrentStack([LSTM(dim=self.hidden_size, activation=Tanh())] * 3)
        recurrent_block.initialize()
        linear = Linear(input_dim=self.hidden_size, output_dim=self.input1_size,
                              weights_init=initialization.Uniform(width=0.01),
                              biases_init=Constant(0))
        linear.initialize()
        softmax = NDimensionalSoftmax()

        l1 = lookup1.apply(x1)
        l2 = lookup2.apply(x2)
        m = merge.apply(l1, l2)
        h = recurrent_block.apply(m)
        a = linear.apply(h)

        y_hat = softmax.apply(a, extra_ndim=1)
        # ValueError: x must be 1-d or 2-d tensor of floats. Got TensorType(float64, 3D)

        self.Cost = softmax.categorical_cross_entropy(y, a, extra_ndim=1).mean()

        self.ComputationGraph = ComputationGraph(self.Cost)

        self.Model = Model(y_hat)
    def __init__(self, image_feature_dim, embedding_dim, **kwargs):
        super(Encoder, self).__init__(**kwargs)

        self.image_embedding = Linear(input_dim=image_feature_dim,
                                      output_dim=embedding_dim,
                                      name="image_embedding")

        self.to_inputs = Linear(
            input_dim=embedding_dim,
            output_dim=embedding_dim *
            4  # times 4 cuz vstack(input, forget, cell, hidden)
            ,
            name="to_inputs")

        self.transition = LSTM(dim=embedding_dim, name="transition")

        self.children = [self.image_embedding, self.to_inputs, self.transition]
Ejemplo n.º 33
0
    def __init__(self, dims=(88, 100, 100), **kwargs):
        super(Rnn, self).__init__(**kwargs)
        self.dims = dims

        self.input_transform = Linear(
            input_dim=dims[0],
            output_dim=dims[1],
            weights_init=IsotropicGaussian(0.01),
            # biases_init=Constant(0.0),
            use_bias=False,
            name="input_transfrom")

        self.gru_layer = SimpleRecurrent(dim=dims[1],
                                         activation=Tanh(),
                                         weights_init=IsotropicGaussian(0.01),
                                         biases_init=Constant(0.0),
                                         use_bias=True,
                                         name="gru_rnn_layer")

        # TODO: find a way to automatically set the output dim in case of lstm vs normal rnn
        self.linear_trans = Linear(input_dim=dims[1],
                                   output_dim=dims[2] * 4,
                                   weights_init=IsotropicGaussian(0.01),
                                   biases_init=Constant(0.0),
                                   use_bias=False,
                                   name="h2h_transform")

        self.lstm_layer = LSTM(dim=dims[2],
                               activation=Tanh(),
                               weights_init=IsotropicGaussian(0.01),
                               biases_init=Constant(0.0),
                               use_bias=True,
                               name="lstm_rnn_layer")

        self.out_transform = MLP(activations=[Sigmoid()],
                                 dims=[dims[2], dims[0]],
                                 weights_init=IsotropicGaussian(0.01),
                                 use_bias=True,
                                 biases_init=Constant(0.0),
                                 name="out_layer")

        self.children = [
            self.input_transform, self.gru_layer, self.linear_trans,
            self.lstm_layer, self.out_transform
        ]
Ejemplo n.º 34
0
def example4():
    """LSTM -> Plante lors de l'initialisation du lstm."""

    x = tensor.tensor3('x')
    dim=3

#    gate_inputs = theano.function([x],x*4)
    gate_inputs = Linear(input_dim=dim,output_dim=dim*4, name="linear",weights_init=initialization.Identity(), biases_init=Constant(2))

    lstm = LSTM(dim=dim,activation=Tanh(), weights_init=IsotropicGaussian(), biases_init=Constant(0))
    
    gate_inputs.initialize()
    hg = gate_inputs.apply(x)
    

    #print(gate_inputs.parameters)
    #print(gate_inputs.parameters[1].get_value())
    
    lstm.initialize()
    h, cells = lstm.apply(hg)
    print(lstm.parameters)
    
    f = theano.function([x], h)
    print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX)))
    print(f(np.ones((dim, 1, dim), dtype=theano.config.floatX)))
    print(f(4*np.ones((dim, 1, dim), dtype=theano.config.floatX)))
 
    print("Good Job!")


#    lstm_output = 

    #Initial State
    h0 = tensor.matrix('h0')
    c =  tensor.matrix('cells')
    h,c1 = lstm.apply(inputs=x, states=h0, cells=c) # lstm.apply(states=h0,cells=cells,inputs=gate_inputs)

    f = theano.function([x, h0, c], h)
    print("a")
    print(f(np.ones((3, 1, 3), dtype=theano.config.floatX),
            np.ones((1, 3), dtype=theano.config.floatX),
            np.ones((1, 3), dtype=theano.config.floatX))) 
Ejemplo n.º 35
0
def getBidir2(input_dim,input_var):
    """ LSTM-based bidirectionnal """
    bidir = Bidirectional(weights_init=Orthogonal(),
                               prototype=LSTM(dim=input_dim, name='lstm'))
    #bidir.allocate()
    bidir.initialize()
    h = bidir.apply(input_var)

    net = add_softmax_layer(h, input_dim, 2)

    return net
Ejemplo n.º 36
0
    def __init__(self, dim, activation=None, depth=2, name=None,
                 lstm_name=None, **kwargs):
        super(LSTMstack, self).__init__(name=name, **kwargs)
        # use the name allready processed by superclass
        name = self.name
        self.dim = dim

        self.children = []
        self.depth = depth
        for d in range(self.depth):
            layer_node = LSTM(dim, activation, name=lstm_name)
            layer_node.name = '%s_%s_%d'%(name, layer_node.name, d)

            if d > 0:
                # convert states of previous layer to inputs of new layer
                layer_name = '%s_%d_%d'%(name, d-1, d)
                input_dim = layer_node.get_dim('inputs')
                self.children.append(Linear(dim, input_dim,
                                            use_bias=True,
                                            name=layer_name))
            self.children.append(layer_node)
Ejemplo n.º 37
0
   def __init__(self, layers_no, dim, alphabet_size, batch_size):
      # characters -> 1-of-N embedder -> N-to-dim -> LSTM#0 -> ... -> LSTM#(layers_no-1) -> dim-to-N -> softmax
      # TODO zdefiniowac blad

      # TODO first_resizer

      # LSTM stack
      self.stack = []
      lstms = map(lambda _: LSTM(dim=dim), range(layers_no))
      for lstm in lstms:
         state, cell = lstm.initial_states(batch_size)
         self.stack.append(lstm, state, cell)
Ejemplo n.º 38
0
def make_bidir_lstm_stack(seq, seq_dim, mask, sizes, skip=True, name=''):
    bricks = []

    curr_dim = [seq_dim]
    curr_hidden = [seq]

    hidden_list = []
    for k, dim in enumerate(sizes):
        fwd_lstm_ins = [Linear(input_dim=d, output_dim=4*dim, name='%s_fwd_lstm_in_%d_%d'%(name,k,l)) for l, d in enumerate(curr_dim)]
        fwd_lstm = LSTM(dim=dim, activation=Tanh(), name='%s_fwd_lstm_%d'%(name,k))

        bwd_lstm_ins = [Linear(input_dim=d, output_dim=4*dim, name='%s_bwd_lstm_in_%d_%d'%(name,k,l)) for l, d in enumerate(curr_dim)]
        bwd_lstm = LSTM(dim=dim, activation=Tanh(), name='%s_bwd_lstm_%d'%(name,k))

        bricks = bricks + [fwd_lstm, bwd_lstm] + fwd_lstm_ins + bwd_lstm_ins

        fwd_tmp = sum(x.apply(v) for x, v in zip(fwd_lstm_ins, curr_hidden))
        bwd_tmp = sum(x.apply(v) for x, v in zip(bwd_lstm_ins, curr_hidden))
        fwd_hidden, _ = fwd_lstm.apply(fwd_tmp, mask=mask)
        bwd_hidden, _ = bwd_lstm.apply(bwd_tmp[::-1], mask=mask[::-1])
        hidden_list = hidden_list + [fwd_hidden, bwd_hidden]
        if skip:
            curr_hidden = [seq, fwd_hidden, bwd_hidden[::-1]]
            curr_dim = [seq_dim, dim, dim]
        else:
            curr_hidden = [fwd_hidden, bwd_hidden[::-1]]
            curr_dim = [dim, dim]

    return bricks, hidden_list
Ejemplo n.º 39
0
    def __init__(self, input_dim, dim, **kwargs):
        super(CoreNetwork, self).__init__(**kwargs)
        self.input_dim = input_dim
        self.dim = dim
        self.lstm = LSTM(dim=dim, name=self.name + '_lstm',
                         weights_init=self.weights_init,
                         biases_init=self.biases_init)

        self.proj = Linear(input_dim=input_dim, output_dim=dim*4,
                           name=self.name + '_proj',
                           weights_init=self.weights_init,
                           biases_init=self.biases_init)
        self.children = [self.lstm, self.proj]
Ejemplo n.º 40
0
    def __init__(self, dimension, input_size, embed_input=False, **kwargs):
        super(LSTMEncoder, self).__init__(**kwargs)
        if embed_input:
            self.embedder = LookupTable(input_size, dimension)
        else:
            self.embedder = Linear(input_size, dimension)
        self.fork = Fork(['inputs'],
                         dimension,
                         output_dims=[dimension],
                         prototype=Linear(dimension, 4 * dimension))
        encoder = Bidirectional(LSTM(dim=dimension, activation=Tanh()))

        self.encoder = encoder
        self.children = [encoder, self.embedder, self.fork]
Ejemplo n.º 41
0
    def setUp(self):
        n_iter = 2

        x_dim = 8
        z_dim = 10
        dec_dim = 12
        enc_dim = 16

        read_dim = 2 * x_dim

        rnninits = {
            #'weights_init': Orthogonal(),
            'weights_init': IsotropicGaussian(0.01),
            'biases_init': Constant(0.),
        }
        inits = {
            #'weights_init': Orthogonal(),
            'weights_init': IsotropicGaussian(0.01),
            'biases_init': Constant(0.),
        }

        reader = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits)
        writer = Writer(input_dim=dec_dim, output_dim=x_dim, **inits)

        encoder_rnn = LSTM(dim=enc_dim, name="RNN_enc", **rnninits)
        decoder_rnn = LSTM(dim=dec_dim, name="RNN_dec", **rnninits)
        encoder_mlp = MLP([Identity()], [(read_dim + dec_dim), 4 * enc_dim],
                          name="MLP_enc",
                          **inits)
        decoder_mlp = MLP([Identity()], [z_dim, 4 * dec_dim],
                          name="MLP_dec",
                          **inits)
        q_sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, **inits)

        self.draw = DrawModel(n_iter, reader, encoder_mlp, encoder_rnn,
                              q_sampler, decoder_mlp, decoder_rnn, writer)
        self.draw.initialize()
Ejemplo n.º 42
0
    def __init__(self, word_dim, hidden_dim):
        self.forward_lstm= LSTM(hidden_dim,
                                name='question_forward_lstm',
                                weights_init=IsotropicGaussian(0.01),
                                biases_init=Constant(0))
        self.backward_lstm= LSTM(hidden_dim,
                                 name='question_backward_lstm',
                                 weights_init=IsotropicGaussian(0.01),
                                 biases_init=Constant(0))
        self.x_to_h_forward = Linear(word_dim,
                                     hidden_dim * 4,
                                     name='word_x_to_h_forward',
                                     weights_init=IsotropicGaussian(0.01),
                                     biases_init=Constant(0))
        self.x_to_h_backward = Linear(word_dim,
                                      hidden_dim * 4,
                                      name='word_x_to_h_backward',
                                      weights_init=IsotropicGaussian(0.01),
                                      biases_init=Constant(0))

        self.forward_lstm.initialize()
        self.backward_lstm.initialize()
        self.x_to_h_forward.initialize()
        self.x_to_h_backward.initialize()
Ejemplo n.º 43
0
    def setUp(self):
        depth = 4
        self.depth = depth
        dim = 3  # don't change, hardwired in the code
        transitions = [LSTM(dim=dim) for _ in range(depth)]
        self.stack0 = RecurrentStack(transitions,
                                     weights_init=Constant(2),
                                     biases_init=Constant(0))
        self.stack0.initialize()

        self.stack2 = RecurrentStack(transitions,
                                     weights_init=Constant(2),
                                     biases_init=Constant(0),
                                     skip_connections=True)
        self.stack2.initialize()
Ejemplo n.º 44
0
    def build_theano_functions(self) :
        #import pdb ; pdb.set_trace()
        x = T.fmatrix('x')
        s = T.fvector('s')
        mu = T.fvector('mu')
        mu = T.reshape(mu,(self.number_of_mix,1))
        pi = T.fvector('pi')

        lstm = LSTM(
            dim=self.input_dim/4,
            weights_init=IsotropicGaussian(0.5),
            biases_init=Constant(1))
        lstm.initialize()
        h, c = lstm.apply(x)
        h = h[0][0][-1]

        LL = T.sum(pi*(1./(T.sqrt(2.*np.pi)*s))*T.exp(\
            -0.5*(h-mu)**2/T.reshape(s,(self.number_of_mix,1))**2.).sum(axis=1))
        cost = -T.log(LL)

        #cg = ComputationGraph(cost)
        #self.cg = cg
        #parameters = cg.parameters
        model = Model(cost)
        self.model = model
        parameters = model.parameters

        grads = T.grad(cost, parameters)
        updates = []
        for i in range(len(grads)) :
            updates.append(tuple([parameters[i], parameters[i] - self.lr*grads[i]]))

        gradf = theano.function([x,s,mu,pi],[cost],updates=updates)
        f = theano.function([x],[h])

        return gradf, f
Ejemplo n.º 45
0
class Encoder(Initializable):

    def __init__(self, image_feature_dim, embedding_dim, **kwargs):
        super(Encoder, self).__init__(**kwargs)

        self.image_embedding = Linear(
              input_dim=image_feature_dim
            , output_dim=embedding_dim
            # , weights_init=IsotropicGaussian(0.02)
            # , biases_init=Constant(0.)
            , name="image_embedding"
            )

        self.to_inputs = Linear(
              input_dim=embedding_dim
            , output_dim=embedding_dim*4 # gate_inputs = vstack(input, forget, cell, hidden)
            # , weights_init=IsotropicGaussian(0.02)
            # , biases_init=Constant(0.)
            , name="to_inputs"
            )

        # Don't think this dim has to also be dimension, more arbitrary
        self.transition = LSTM(
            dim=embedding_dim, name="transition")

        self.children = [ self.image_embedding
                        , self.to_inputs
                        , self.transition
                        ]

    @application(inputs=['image_vects', 'word_vects'], outputs=['image_embedding', 'sentence_embedding'])   
    def apply(self, image_vects, word_vects):

        image_embedding = self.image_embedding.apply(image_vects)

        # inputs = word_vects
        inputs = self.to_inputs.apply(word_vects)
        inputs = inputs.dimshuffle(1, 0, 2)
        hidden, cells = self.transition.apply(inputs=inputs, mask=None)

        # the last hidden state represents the accumulation of all the words (i.e. the sentence)
        # grab all batches, grab the last value representing accumulation of the sequence, grab all features
        sentence_embedding = hidden[-1]
        # sentence_embedding = inputs.mean(axis=0)
        return image_embedding, sentence_embedding
class Encoder(Initializable):

    def __init__(self, image_feature_dim, embedding_dim, **kwargs):
        super(Encoder, self).__init__(**kwargs)

        self.image_embedding = Linear(
              input_dim=image_feature_dim
            , output_dim=embedding_dim
            , name="image_embedding"
            )

        self.to_inputs = Linear(
              input_dim=embedding_dim
            , output_dim=embedding_dim*4 # times 4 cuz vstack(input, forget, cell, hidden)
            , name="to_inputs"
            )

        self.transition = LSTM(
            dim=embedding_dim, name="transition")

        self.children = [ self.image_embedding
                        , self.to_inputs
                        , self.transition
                        ]

    @application(
          inputs=['image_vects', 'word_vects']
        , outputs=['image_embedding', 'sentence_embedding']
        )   
    def apply(self, image_vects, word_vects):

        image_embedding = self.image_embedding.apply(image_vects)

        inputs = self.to_inputs.apply(word_vects)
        
        # shuffle dimensions to correspond to (sequence, batch, features)
        inputs = inputs.dimshuffle(1, 0, 2)
        
        hidden, cells = self.transition.apply(inputs=inputs, mask=None)

        # last hidden state represents the accumulation of word embeddings 
        # (i.e. the sentence embedding)
        sentence_embedding = hidden[-1]

        return image_embedding, sentence_embedding
    def __init__(self, image_feature_dim, embedding_dim, **kwargs):
        super(Encoder, self).__init__(**kwargs)

        self.image_embedding = Linear(
              input_dim=image_feature_dim
            , output_dim=embedding_dim
            , name="image_embedding"
            )

        self.to_inputs = Linear(
              input_dim=embedding_dim
            , output_dim=embedding_dim*4 # times 4 cuz vstack(input, forget, cell, hidden)
            , name="to_inputs"
            )

        self.transition = LSTM(
            dim=embedding_dim, name="transition")

        self.children = [ self.image_embedding
                        , self.to_inputs
                        , self.transition
                        ]
Ejemplo n.º 48
0
    def __init__(
        self,
        input_dim,
        state_dim,
        activation=Tanh(),
        state_weights_init=None,
        input_weights_init=None,
        biases_init=init.Constant(0),
        **kwargs
    ):
        super(LSTMLayer, self).__init__(biases_init=biases_init, **kwargs)
        if state_weights_init is None:
            state_weights_init = init.IsotropicGaussian(0.01)
        if input_weights_init is None:
            input_weights_init = init.IsotropicGaussian(0.01)
        if biases_init is None:
            biases_init = init.Constant(0)

        self.input_transformation = Linear(
            input_dim=input_dim, output_dim=state_dim * 4, weights_init=input_weights_init, biases_init=biases_init
        )
        self.lstm = LSTM(dim=state_dim, activation=activation, weights_init=state_weights_init)
        self.children = [self.input_transformation, self.lstm]
Ejemplo n.º 49
0
    def __init__(self, dims=(88, 100, 100), **kwargs):
        super(Rnn, self).__init__(**kwargs)
        self.dims = dims

        self.input_transform = Linear(input_dim=dims[0], output_dim=dims[1],
                                      weights_init=IsotropicGaussian(0.01),
                                      # biases_init=Constant(0.0),
                                      use_bias=False,
                                      name="input_transfrom")

        self.gru_layer = SimpleRecurrent(dim=dims[1], activation=Tanh(),
                                         weights_init=IsotropicGaussian(0.01),
                                         biases_init=Constant(0.0),
                                         use_bias=True,
                                         name="gru_rnn_layer")

        # TODO: find a way to automatically set the output dim in case of lstm vs normal rnn
        self.linear_trans = Linear(input_dim=dims[1], output_dim=dims[2] * 4,
                                   weights_init=IsotropicGaussian(0.01),
                                   biases_init=Constant(0.0),
                                   use_bias=False,
                                   name="h2h_transform")

        self.lstm_layer = LSTM(dim=dims[2], activation=Tanh(),
                               weights_init=IsotropicGaussian(0.01),
                               biases_init=Constant(0.0),
                               use_bias=True,
                               name="lstm_rnn_layer")

        self.out_transform = MLP(activations=[Sigmoid()], dims=[dims[2], dims[0]],
                                 weights_init=IsotropicGaussian(0.01),
                                 use_bias=True,
                                 biases_init=Constant(0.0),
                                 name="out_layer")

        self.children = [self.input_transform, self.gru_layer, self.linear_trans,
                         self.lstm_layer, self.out_transform]
Ejemplo n.º 50
0
 def setUp(self):
     self.lstm = LSTM(dim=3, weights_init=Constant(2),
                      biases_init=Constant(0))
     self.lstm.initialize()
Ejemplo n.º 51
0
class TestLSTM(unittest.TestCase):
    def setUp(self):
        self.lstm = LSTM(dim=3, weights_init=Constant(2),
                         biases_init=Constant(0))
        self.lstm.initialize()

    def test_one_step(self):
        h0 = tensor.matrix('h0')
        c0 = tensor.matrix('c0')
        x = tensor.matrix('x')
        h1, c1 = self.lstm.apply(x, h0, c0, iterate=False)
        next_h = theano.function(inputs=[x, h0, c0], outputs=[h1])

        h0_val = 0.1 * numpy.array([[1, 1, 0], [0, 1, 1]],
                                   dtype=theano.config.floatX)
        c0_val = 0.1 * numpy.array([[1, 1, 0], [0, 1, 1]],
                                   dtype=theano.config.floatX)
        x_val = 0.1 * numpy.array([range(12), range(12, 24)],
                                  dtype=theano.config.floatX)
        W_state_val = 2 * numpy.ones((3, 12), dtype=theano.config.floatX)
        W_cell_to_in = 2 * numpy.ones((3,), dtype=theano.config.floatX)
        W_cell_to_out = 2 * numpy.ones((3,), dtype=theano.config.floatX)
        W_cell_to_forget = 2 * numpy.ones((3,), dtype=theano.config.floatX)

        # omitting biases because they are zero
        activation = numpy.dot(h0_val, W_state_val) + x_val

        def sigmoid(x):
            return 1. / (1. + numpy.exp(-x))

        i_t = sigmoid(activation[:, :3] + c0_val * W_cell_to_in)
        f_t = sigmoid(activation[:, 3:6] + c0_val * W_cell_to_forget)
        next_cells = f_t * c0_val + i_t * numpy.tanh(activation[:, 6:9])
        o_t = sigmoid(activation[:, 9:12] +
                      next_cells * W_cell_to_out)
        h1_val = o_t * numpy.tanh(next_cells)
        assert_allclose(h1_val, next_h(x_val, h0_val, c0_val)[0],
                        rtol=1e-6)

    def test_many_steps(self):
        x = tensor.tensor3('x')
        mask = tensor.matrix('mask')
        h, c = self.lstm.apply(x, mask=mask, iterate=True)
        calc_h = theano.function(inputs=[x, mask], outputs=[h])

        x_val = (0.1 * numpy.asarray(
            list(itertools.islice(itertools.permutations(range(12)), 0, 24)),
            dtype=theano.config.floatX))
        x_val = numpy.ones((24, 4, 12),
                           dtype=theano.config.floatX) * x_val[:, None, :]
        mask_val = numpy.ones((24, 4), dtype=theano.config.floatX)
        mask_val[12:24, 3] = 0
        h_val = numpy.zeros((25, 4, 3), dtype=theano.config.floatX)
        c_val = numpy.zeros((25, 4, 3), dtype=theano.config.floatX)
        W_state_val = 2 * numpy.ones((3, 12), dtype=theano.config.floatX)
        W_cell_to_in = 2 * numpy.ones((3,), dtype=theano.config.floatX)
        W_cell_to_out = 2 * numpy.ones((3,), dtype=theano.config.floatX)
        W_cell_to_forget = 2 * numpy.ones((3,), dtype=theano.config.floatX)

        def sigmoid(x):
            return 1. / (1. + numpy.exp(-x))

        for i in range(1, 25):
            activation = numpy.dot(h_val[i-1], W_state_val) + x_val[i-1]
            i_t = sigmoid(activation[:, :3] + c_val[i-1] * W_cell_to_in)
            f_t = sigmoid(activation[:, 3:6] + c_val[i-1] * W_cell_to_forget)
            c_val[i] = f_t * c_val[i-1] + i_t * numpy.tanh(activation[:, 6:9])
            o_t = sigmoid(activation[:, 9:12] +
                          c_val[i] * W_cell_to_out)
            h_val[i] = o_t * numpy.tanh(c_val[i])
            h_val[i] = (mask_val[i - 1, :, None] * h_val[i] +
                        (1 - mask_val[i - 1, :, None]) * h_val[i - 1])
            c_val[i] = (mask_val[i - 1, :, None] * c_val[i] +
                        (1 - mask_val[i - 1, :, None]) * c_val[i - 1])

        h_val = h_val[1:]
        assert_allclose(h_val, calc_h(x_val, mask_val)[0], rtol=1e-04)

        # Also test that initial state is a parameter
        initial1, initial2 = VariableFilter(roles=[INITIAL_STATE])(
            ComputationGraph(h))
        assert is_shared_variable(initial1)
        assert is_shared_variable(initial2)
        assert {initial1.name, initial2.name} == {
            'initial_state', 'initial_cells'}
Ejemplo n.º 52
0
    def __init__(self):
        inp = tensor.tensor3('input')
        inp = inp.dimshuffle(1,0,2)
        target = tensor.matrix('target')
        target = target.reshape((target.shape[0],))
        product = tensor.lvector('product')
        missing = tensor.eq(inp, 0)
        train_input_mean = 1470614.1
        train_input_std = 3256577.0

        trans_1 = tensor.concatenate((inp[1:,:,:],tensor.zeros((1,inp.shape[1],inp.shape[2]))), axis=0) 
        trans_2 = tensor.concatenate((tensor.zeros((1,inp.shape[1],inp.shape[2])), inp[:-1,:,:]), axis=0) 
        inp = tensor.switch(missing,(trans_1+trans_2)/2, inp)

        lookup = LookupTable(length = 352, dim=4*hidden_dim)
        product_embed= lookup.apply(product)

        salut = tensor.concatenate((inp, missing),axis =2)
        linear = Linear(input_dim=input_dim+1, output_dim=4*hidden_dim,
                        name="lstm_in")
        inter = linear.apply(salut)
        inter = inter + product_embed[None,:,:] 


        lstm = LSTM(dim=hidden_dim, activation=activation_function,
                    name="lstm")

        hidden, cells = lstm.apply(inter)

        linear2= Linear(input_dim = hidden_dim, output_dim = out_dim,
                       name="ouput_linear")

        pred = linear2.apply(hidden[-1])*train_input_std + train_input_mean
        pred = pred.reshape((product.shape[0],))
        
        cost = tensor.mean(abs((pred-target)/target)) 
        # Initialize all bricks
        for brick in [linear, linear2, lstm, lookup]:
            brick.weights_init = IsotropicGaussian(0.1)
            brick.biases_init = Constant(0.)
            brick.initialize()

        # Apply noise and dropout
        cg = ComputationGraph([cost])
        if w_noise_std > 0:
            noise_vars = VariableFilter(roles=[WEIGHT])(cg)
            cg = apply_noise(cg, noise_vars, w_noise_std)
        if i_dropout > 0:
            cg = apply_dropout(cg, [hidden], i_dropout)
        [cost_reg] = cg.outputs
        cost_reg += 1e-20

        if cost_reg is not cost:
            self.cost = cost
            self.cost_reg = cost_reg

            cost_reg.name = 'cost_reg'
            cost.name = 'cost'

            self.sgd_cost = cost_reg

            self.monitor_vars = [[cost, cost_reg]]
        else:
            self.cost = cost
            cost.name = 'cost'

            self.sgd_cost = cost

            self.monitor_vars = [[cost]]

        self.pred = pred
        pred.name = 'pred'
Ejemplo n.º 53
0
def main(nvis, nhid, encoding_lstm_dim, decoding_lstm_dim, T=1):
    x = tensor.matrix('features')

    # Construct and initialize model
    encoding_mlp = MLP([Tanh()], [None, None])
    decoding_mlp = MLP([Tanh()], [None, None])
    encoding_lstm = LSTM(dim=encoding_lstm_dim)
    decoding_lstm = LSTM(dim=decoding_lstm_dim)
    draw = DRAW(nvis=nvis, nhid=nhid, T=T, encoding_mlp=encoding_mlp,
                decoding_mlp=decoding_mlp, encoding_lstm=encoding_lstm,
                decoding_lstm=decoding_lstm, biases_init=Constant(0),
                weights_init=Orthogonal())
    draw.push_initialization_config()
    encoding_lstm.weights_init = IsotropicGaussian(std=0.001)
    decoding_lstm.weights_init = IsotropicGaussian(std=0.001)
    draw.initialize()

    # Compute cost
    cost = -draw.log_likelihood_lower_bound(x).mean()
    cost.name = 'nll_upper_bound'
    model = Model(cost)

    # Datasets and data streams
    mnist_train = BinarizedMNIST('train')
    train_loop_stream = ForceFloatX(DataStream(
        dataset=mnist_train,
        iteration_scheme=SequentialScheme(mnist_train.num_examples, 100)))
    train_monitor_stream = ForceFloatX(DataStream(
        dataset=mnist_train,
        iteration_scheme=SequentialScheme(mnist_train.num_examples, 500)))
    mnist_valid = BinarizedMNIST('valid')
    valid_monitor_stream = ForceFloatX(DataStream(
        dataset=mnist_valid,
        iteration_scheme=SequentialScheme(mnist_valid.num_examples, 500)))
    mnist_test = BinarizedMNIST('test')
    test_monitor_stream = ForceFloatX(DataStream(
        dataset=mnist_test,
        iteration_scheme=SequentialScheme(mnist_test.num_examples, 500)))

    # Get parameters and monitoring channels
    computation_graph = ComputationGraph([cost])
    params = VariableFilter(roles=[PARAMETER])(computation_graph.variables)
    monitoring_channels = dict([
        ('avg_' + channel.tag.name, channel.mean()) for channel in
        VariableFilter(name='.*term$')(computation_graph.auxiliary_variables)])
    for name, channel in monitoring_channels.items():
        channel.name = name
    monitored_quantities = monitoring_channels.values() + [cost]

    # Training loop
    step_rule = RMSProp(learning_rate=1e-3, decay_rate=0.95)
    algorithm = GradientDescent(cost=cost, params=params, step_rule=step_rule)
    algorithm.add_updates(computation_graph.updates)
    main_loop = MainLoop(
        model=model, data_stream=train_loop_stream, algorithm=algorithm,
        extensions=[
            Timing(),
            SerializeMainLoop('vae.pkl', save_separately=['model']),
            FinishAfter(after_n_epochs=200),
            DataStreamMonitoring(
                monitored_quantities, train_monitor_stream, prefix="train",
                updates=computation_graph.updates),
            DataStreamMonitoring(
                monitored_quantities, valid_monitor_stream, prefix="valid",
                updates=computation_graph.updates),
            DataStreamMonitoring(
                monitored_quantities, test_monitor_stream, prefix="test",
                updates=computation_graph.updates),
            ProgressBar(),
            Printing()])
    main_loop.run()
Ejemplo n.º 54
0
    def build_theano_functions(self):
        x = T.fmatrix('time_sequence')
        x = x.reshape((self.batch_dim, self.sequence_dim, self.time_dim))

        y = x[:,1:self.sequence_dim,:]
        x = x[:,:self.sequence_dim-1,:]

        # if we try to include the spectrogram features
        spec_dims = 0
        if self.image_size is not None :
            print "Convolution activated"
            self.init_conv()
            spec = T.ftensor4('spectrogram')
            spec_features, spec_dims = self.conv.build_conv_layers(spec)
            print "Conv final dims =", spec_dims
            spec_dims = np.prod(spec_dims)
            spec_features = spec_features.reshape(
                (self.batch_dim, self.sequence_dim-1, spec_dims))
            x = T.concatenate([x, spec_features], axis=2)

        layers_input = [x]
        dims =np.array([self.time_dim + spec_dims])
        for dim in self.lstm_layers_dim :
            dims = np.append(dims, dim)
        print "Dimensions =", dims

        # layer is just an index of the layer
        for layer in range(len(self.lstm_layers_dim)) :

            # before the cell, input, forget and output gates, x needs to
            # be transformed
            linear = Linear(dims[layer],
                            dims[layer+1]*4,
                            weights_init=Orthogonal(self.orth_scale),
                            biases_init=Constant(0),
                            name="linear"+str(layer))
            linear.initialize()
            lstm_input = linear.apply(layers_input[layer])

            # the lstm wants batch X sequence X time
            lstm = LSTM(
                dim=dims[layer+1],
                weights_init=IsotropicGaussian(mean=0.,std=0.5),
                biases_init=Constant(1),
                name="lstm"+str(layer))
            lstm.initialize()
            # hack to use Orthogonal on lstm w_state
            lstm.W_state.set_value(
                self.orth_scale*Orthogonal().generate(np.random, lstm.W_state.get_value().shape))
            h, _dummy = lstm.apply(lstm_input)

            layers_input.append(h)

        # this is where Alex Graves' paper starts
        print "Last linear transform dim :", dims[1:].sum()
        output_transform = Linear(dims[1:].sum(),
                                  self.output_dim,
                                  weights_init=Orthogonal(self.orth_scale),
                                  use_bias=False,
                                  name="output_transform")
        output_transform.initialize()
        if len(self.lstm_layers_dim) == 1 :
            print "hallo there, only one layer speaking"
            y_hat = output_transform.apply(layers_input[-1])
        else :
            y_hat = output_transform.apply(T.concatenate(layers_input[1:], axis=2))

        # transforms to find each gmm params (mu, pi, sig)
        # small hack to softmax a 3D tensor
        pis = T.reshape(
                    T.nnet.softmax(
                        T.reshape(y_hat[:,:,:self.gmm_dim], ((self.sequence_dim-1)*self.batch_dim, self.gmm_dim))),
                    (self.batch_dim, (self.sequence_dim-1), self.gmm_dim))
        sig = T.exp(y_hat[:,:,self.gmm_dim:self.gmm_dim*2])+1e-6
        mus = y_hat[:,:,self.gmm_dim*2:]

        pis = pis[:,:,:,np.newaxis]
        mus = mus[:,:,:,np.newaxis]
        sig = sig[:,:,:,np.newaxis]
        y = y[:,:,np.newaxis,:]

        y = T.patternbroadcast(y, (False, False, True, False))
        mus = T.patternbroadcast(mus, (False, False, False, True))
        sig = T.patternbroadcast(sig, (False, False, False, True))

        # sum likelihood with targets
        # see blog for this crazy Pr() = sum log sum prod
        # axes :: (batch, sequence, mixture, time)
        expo_term = -0.5*((y-mus)**2)/sig**2
        coeff = T.log(T.maximum(1./(T.sqrt(2.*np.pi)*sig), EPS))
        #coeff = T.log(1./(T.sqrt(2.*np.pi)*sig))
        sequences = coeff + expo_term
        log_sequences = T.log(pis + EPS) + T.sum(sequences, axis=3, keepdims=True)

        log_sequences_max = T.max(log_sequences, axis=2, keepdims=True)

        LL = -(log_sequences_max + T.log(EPS + T.sum(T.exp(log_sequences - log_sequences_max), axis=2, keepdims=True))).mean()
        LL.name = "summed_likelihood"

        model = Model(LL)
        self.model = model
        parameters = model.parameters

        algorithm = GradientDescent(
            cost=LL,
            parameters=model.parameters,
            step_rule=Adam())

        f = theano.function([x],[pis, sig, mus])

        return algorithm, f
class LanguageModel(Initializable):
    """
    This takes the word embeddings from LSTMCompositionalLayer and creates sentence embeddings using a LSTM

    compositional_layer_type can be:
        1) 'BidirectionalLSTMCompositionalLayer'
        2) 'UnidirectionalLSTMCompositionalLayer'
        3) 'BaselineLSTMCompositionalLayer'

    Input is a 3d tensor with the dimensions of (num_words, num_subwords, batch_size) and
    a 3d tensor a mask of size (num_words, num_subwords, batch_size)

    All hidden state sizes are the same as the subword embedding size

    This returns a 3d tensor with dimensions of
    (num_words = num RNN states, batch_size, sentence embedding size = LM_RNN_hidden_state_size = subword_RNN_hidden_state_size * 2)
    """

    def __init__(self, batch_size, num_subwords, num_words, subword_embedding_size, input_vocab_size,
                 subword_RNN_hidden_state_size, LM_RNN_hidden_state_size, table_width=0.08,
                 compositional_layer_type='BidirectionalLSTMCompositionalLayer', init_type='xavier', **kwargs):

        super(LanguageModel, self).__init__(**kwargs)
        self.batch_size = batch_size
        self.num_subwords = num_subwords # number of subwords which make up a word
        self.num_words = num_words  # number of words in the sentence
        self.subword_embedding_size = subword_embedding_size
        self.input_vocab_size = input_vocab_size
        self.subword_RNN_hidden_state_size = subword_RNN_hidden_state_size  #i.e. word embedding size
        self.LM_RNN_hidden_state_size = LM_RNN_hidden_state_size #i.e sentence embedding size
        self.table_width = table_width

        self.name = 'Language_Model'

        if init_type == 'xavier':
            linear_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size, self.LM_RNN_hidden_state_size)
            lstm_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size, self.LM_RNN_hidden_state_size)
        else:  # default is gaussian
            linear_init = IsotropicGaussian()
            lstm_init = IsotropicGaussian()


        self.compositional_layer = None
        self.linear = None
        if compositional_layer_type == 'BidirectionalLSTMCompositionalLayer':
            self.compositional_layer =  BidirectionalLSTMCompositionalLayer(self.batch_size, self.num_subwords, self.num_words,
                                                          self.subword_embedding_size, self.input_vocab_size,
                                                          self.subword_RNN_hidden_state_size, self.table_width,
                                                          init_type=init_type, name='compositional_layer')

            if init_type == 'xavier':
                linear_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size * 2, self.LM_RNN_hidden_state_size)
                lstm_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size * 2, self.LM_RNN_hidden_state_size)
            else:  # default is gaussian
                linear_init = IsotropicGaussian()
                lstm_init = IsotropicGaussian()

            self.linear = Linear(input_dim=self.subword_RNN_hidden_state_size * 2, #  2 * for the bidirectional
                                     output_dim=self.LM_RNN_hidden_state_size * 4,
                                     name='linear', weights_init=IsotropicGaussian(), biases_init=Constant(0.0))

        elif compositional_layer_type == 'UnidirectionalLSTMCompositionalLayer':
            self.compositional_layer =  LSTMCompositionalLayer(self.batch_size, self.num_subwords, self.num_words,
                                                          self.subword_embedding_size, self.input_vocab_size,
                                                          self.subword_RNN_hidden_state_size, self.table_width,
                                                          init_type=init_type, name='compositional_layer')

            self.linear = Linear(input_dim=self.subword_RNN_hidden_state_size,
                                     output_dim=self.LM_RNN_hidden_state_size * 4,
                                     name='linear', weights_init=IsotropicGaussian(), biases_init=Constant(0.0))

        elif compositional_layer_type == 'BaselineLSTMCompositionalLayer':
            self.compositional_layer =  BaselineLSTMCompositionalLayer(self.batch_size, self.num_subwords, self.num_words,
                                                          self.subword_embedding_size, self.input_vocab_size,
                                                          self.subword_RNN_hidden_state_size, self.table_width,
                                                          init_type=init_type, name='compositional_layer')

            self.linear = Linear(input_dim=self.subword_RNN_hidden_state_size,
                                     output_dim=self.LM_RNN_hidden_state_size * 4,
                                     name='linear', weights_init=IsotropicGaussian(), biases_init=Constant(0.0))

        else:
            print('ERROR: compositional_layer_type = ' + compositional_layer_type + ' is invalid')
            sys.exit()

        # has one RNN which reads the word embeddings into a sentence embedding, or partial sentence embeddings
        self.language_model_RNN = LSTM(
            dim=self.LM_RNN_hidden_state_size, activation=Identity(), name='language_model_RNN',
            weights_init=IsotropicGaussian(), biases_init=Constant(0.0))

        self.children = [self.compositional_layer, self.linear, self.language_model_RNN]


    @application(inputs=['subword_id_input_', 'subword_id_input_mask_'], outputs=['sentence_embeddings', 'word_embeddings_mask'])
    def apply(self, subword_id_input_, subword_id_input_mask_):
        """
        subword_id_input_ is a 3d tensor with the dimensions of shape = (num_words, num_subwords, batch_size).
        It is expected as a dtype=uint16 or equivalent

        subword_id_input_mask_ is a 3d tensor with the dimensions of shape = (num_words, num_subwords, batch_size).
        It is expected as a dtype=uint8 or equivalent and has binary values of 1 when there is data and zero otherwise.

        Returned is a 3d tensor of size (num_words = num RNN states, batch_size, sentence embedding size)
        Also returned is a 1d tensor of size (batch_size) describing if the sentence is valid of empty in the batch
        """
        word_embeddings, word_embeddings_mask = self.compositional_layer.apply(subword_id_input_, subword_id_input_mask_)
        sentence_embeddings = self.language_model_RNN.apply(
            self.linear.apply(word_embeddings), mask=word_embeddings_mask)[0] #[0] = hidden states, [1] = cells

        # sentence_embeddings_mask = word_embeddings_mask.max(axis=0).T

        return sentence_embeddings, word_embeddings_mask
    def __init__(self, batch_size, num_subwords, num_words, subword_embedding_size, input_vocab_size,
                 subword_RNN_hidden_state_size, LM_RNN_hidden_state_size, table_width=0.08,
                 compositional_layer_type='BidirectionalLSTMCompositionalLayer', init_type='xavier', **kwargs):

        super(LanguageModel, self).__init__(**kwargs)
        self.batch_size = batch_size
        self.num_subwords = num_subwords # number of subwords which make up a word
        self.num_words = num_words  # number of words in the sentence
        self.subword_embedding_size = subword_embedding_size
        self.input_vocab_size = input_vocab_size
        self.subword_RNN_hidden_state_size = subword_RNN_hidden_state_size  #i.e. word embedding size
        self.LM_RNN_hidden_state_size = LM_RNN_hidden_state_size #i.e sentence embedding size
        self.table_width = table_width

        self.name = 'Language_Model'

        if init_type == 'xavier':
            linear_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size, self.LM_RNN_hidden_state_size)
            lstm_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size, self.LM_RNN_hidden_state_size)
        else:  # default is gaussian
            linear_init = IsotropicGaussian()
            lstm_init = IsotropicGaussian()


        self.compositional_layer = None
        self.linear = None
        if compositional_layer_type == 'BidirectionalLSTMCompositionalLayer':
            self.compositional_layer =  BidirectionalLSTMCompositionalLayer(self.batch_size, self.num_subwords, self.num_words,
                                                          self.subword_embedding_size, self.input_vocab_size,
                                                          self.subword_RNN_hidden_state_size, self.table_width,
                                                          init_type=init_type, name='compositional_layer')

            if init_type == 'xavier':
                linear_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size * 2, self.LM_RNN_hidden_state_size)
                lstm_init = XavierInitializationOriginal(self.subword_RNN_hidden_state_size * 2, self.LM_RNN_hidden_state_size)
            else:  # default is gaussian
                linear_init = IsotropicGaussian()
                lstm_init = IsotropicGaussian()

            self.linear = Linear(input_dim=self.subword_RNN_hidden_state_size * 2, #  2 * for the bidirectional
                                     output_dim=self.LM_RNN_hidden_state_size * 4,
                                     name='linear', weights_init=IsotropicGaussian(), biases_init=Constant(0.0))

        elif compositional_layer_type == 'UnidirectionalLSTMCompositionalLayer':
            self.compositional_layer =  LSTMCompositionalLayer(self.batch_size, self.num_subwords, self.num_words,
                                                          self.subword_embedding_size, self.input_vocab_size,
                                                          self.subword_RNN_hidden_state_size, self.table_width,
                                                          init_type=init_type, name='compositional_layer')

            self.linear = Linear(input_dim=self.subword_RNN_hidden_state_size,
                                     output_dim=self.LM_RNN_hidden_state_size * 4,
                                     name='linear', weights_init=IsotropicGaussian(), biases_init=Constant(0.0))

        elif compositional_layer_type == 'BaselineLSTMCompositionalLayer':
            self.compositional_layer =  BaselineLSTMCompositionalLayer(self.batch_size, self.num_subwords, self.num_words,
                                                          self.subword_embedding_size, self.input_vocab_size,
                                                          self.subword_RNN_hidden_state_size, self.table_width,
                                                          init_type=init_type, name='compositional_layer')

            self.linear = Linear(input_dim=self.subword_RNN_hidden_state_size,
                                     output_dim=self.LM_RNN_hidden_state_size * 4,
                                     name='linear', weights_init=IsotropicGaussian(), biases_init=Constant(0.0))

        else:
            print('ERROR: compositional_layer_type = ' + compositional_layer_type + ' is invalid')
            sys.exit()

        # has one RNN which reads the word embeddings into a sentence embedding, or partial sentence embeddings
        self.language_model_RNN = LSTM(
            dim=self.LM_RNN_hidden_state_size, activation=Identity(), name='language_model_RNN',
            weights_init=IsotropicGaussian(), biases_init=Constant(0.0))

        self.children = [self.compositional_layer, self.linear, self.language_model_RNN]
Ejemplo n.º 57
0
class Model(Initializable):
    @lazy()
    def __init__(self, config, **kwargs):
        super(Model, self).__init__(**kwargs)
        self.config = config

        self.pre_context_embedder = ContextEmbedder(config.pre_embedder, name='pre_context_embedder')
        self.post_context_embedder = ContextEmbedder(config.post_embedder, name='post_context_embedder')

        in1 = 2 + sum(x[2] for x in config.pre_embedder.dim_embeddings)
        self.input_to_rec = MLP(activations=[Tanh()], dims=[in1, config.hidden_state_dim], name='input_to_rec')

        self.rec = LSTM(
                dim = config.hidden_state_dim,
                name = 'recurrent'
            )

        in2 = config.hidden_state_dim + sum(x[2] for x in config.post_embedder.dim_embeddings)
        self.rec_to_output = MLP(activations=[Tanh()], dims=[in2, 2], name='rec_to_output')

        self.sequences = ['latitude', 'latitude_mask', 'longitude']
        self.context = self.pre_context_embedder.inputs + self.post_context_embedder.inputs
        self.inputs = self.sequences + self.context
        self.children = [ self.pre_context_embedder, self.post_context_embedder, self.input_to_rec, self.rec, self.rec_to_output ]

        self.initial_state_ = shared_floatx_zeros((config.hidden_state_dim,),
                name="initial_state")
        self.initial_cells = shared_floatx_zeros((config.hidden_state_dim,),
                name="initial_cells")

    def _push_initialization_config(self):
        for mlp in [self.input_to_rec, self.rec_to_output]:
            mlp.weights_init = self.config.weights_init
            mlp.biases_init = self.config.biases_init
        self.rec.weights_init = self.config.weights_init

    def get_dim(self, name):
        return self.rec.get_dim(name)

    @application
    def initial_state(self, *args, **kwargs):
        return self.rec.initial_state(*args, **kwargs)

    @recurrent(states=['states', 'cells'], outputs=['destination', 'states', 'cells'], sequences=['latitude', 'longitude', 'latitude_mask'])
    def predict_all(self, latitude, longitude, latitude_mask, **kwargs):
        latitude = (latitude - data.train_gps_mean[0]) / data.train_gps_std[0]
        longitude = (longitude - data.train_gps_mean[1]) / data.train_gps_std[1]

        pre_emb = tuple(self.pre_context_embedder.apply(**kwargs))
        latitude = tensor.shape_padright(latitude)
        longitude = tensor.shape_padright(longitude)
        itr = self.input_to_rec.apply(tensor.concatenate(pre_emb + (latitude, longitude), axis=1))
        itr = itr.repeat(4, axis=1)
        (next_states, next_cells) = self.rec.apply(itr, kwargs['states'], kwargs['cells'], mask=latitude_mask, iterate=False)

        post_emb = tuple(self.post_context_embedder.apply(**kwargs))
        rto = self.rec_to_output.apply(tensor.concatenate(post_emb + (next_states,), axis=1))

        rto = (rto * data.train_gps_std) + data.train_gps_mean
        return (rto, next_states, next_cells)

    @predict_all.property('contexts')
    def predict_all_inputs(self):
        return self.context

    @application(outputs=['destination'])
    def predict(self, latitude, longitude, latitude_mask, **kwargs):
        latitude = latitude.T
        longitude = longitude.T
        latitude_mask = latitude_mask.T
        res = self.predict_all(latitude, longitude, latitude_mask, **kwargs)[0]
        return res[-1]

    @predict.property('inputs')
    def predict_inputs(self):
        return self.inputs

    @application(outputs=['cost_matrix'])
    def cost_matrix(self, latitude, longitude, latitude_mask, **kwargs):
        latitude = latitude.T
        longitude = longitude.T
        latitude_mask = latitude_mask.T

        res = self.predict_all(latitude, longitude, latitude_mask, **kwargs)[0]
        target = tensor.concatenate(
                    (kwargs['destination_latitude'].dimshuffle('x', 0, 'x'),
                     kwargs['destination_longitude'].dimshuffle('x', 0, 'x')),
                axis=2)
        target = target.repeat(latitude.shape[0], axis=0)
        ce = error.erdist(target.reshape((-1, 2)), res.reshape((-1, 2)))
        ce = ce.reshape(latitude.shape)
        return ce * latitude_mask

    @cost_matrix.property('inputs')
    def cost_matrix_inputs(self):
        return self.inputs + ['destination_latitude', 'destination_longitude']

    @application(outputs=['cost'])
    def cost(self, latitude_mask, **kwargs):
        return self.cost_matrix(latitude_mask=latitude_mask, **kwargs).sum() / latitude_mask.sum()

    @cost.property('inputs')
    def cost_inputs(self):
        return self.inputs + ['destination_latitude', 'destination_longitude']

    @application(outputs=['cost'])
    def valid_cost(self, **kwargs):
        # Only works when batch_size is 1.
        return self.cost_matrix(**kwargs)[-1,0]

    @valid_cost.property('inputs')
    def valid_cost_inputs(self):
        return self.inputs + ['destination_latitude', 'destination_longitude']