Beispiel #1
0
    def test_symbolic_shape(self):
        from lasagne.layers import InputLayer, ReshapeLayer, get_output
        x = theano.tensor.tensor3()
        batch_size, seq_len, num_features = x.shape
        l_inp = InputLayer((None, None, None))
        l_rshp2 = ReshapeLayer(l_inp, (batch_size*seq_len, [2]))

        # we cannot infer any of the output shapes because they are symbolic.
        output_shape = l_rshp2.get_output_shape_for(
            (batch_size, seq_len, num_features))
        assert output_shape == (None, None)

        output = get_output(l_rshp2, x)
        out1 = output.eval({x: np.ones((3, 5, 6), dtype='float32')})
        out2 = output.eval({x: np.ones((4, 5, 7), dtype='float32')})

        assert out1.shape == (3*5, 6)
        assert out2.shape == (4*5, 7)
Beispiel #2
0
    def test_symbolic_shape(self):
        from lasagne.layers import InputLayer, ReshapeLayer, get_output
        x = theano.tensor.tensor3()
        batch_size, seq_len, num_features = x.shape
        l_inp = InputLayer((None, None, None))
        l_rshp2 = ReshapeLayer(l_inp, (batch_size * seq_len, [2]))

        # we cannot infer any of the output shapes because they are symbolic.
        output_shape = l_rshp2.get_output_shape_for(
            (batch_size, seq_len, num_features))
        assert output_shape == (None, None)

        output = get_output(l_rshp2, x)
        out1 = output.eval({x: np.ones((3, 5, 6), dtype='float32')})
        out2 = output.eval({x: np.ones((4, 5, 7), dtype='float32')})

        assert out1.shape == (3 * 5, 6)
        assert out2.shape == (4 * 5, 7)
Beispiel #3
0
def create_lstm(input_vars, num_inputs, depth, hidden_layer_size, num_outputs):
    network = lasagne.layers.InputLayer(shape=(None, 1, 1, num_inputs),
                                        input_var=input_vars)
    #network = GaussianNoiseLayer(network, sigma=0.01)
    nonlin = lasagne.nonlinearities.rectify  # leaky_rectify
    for i in range(depth):
        network = LSTMLayer(network,
                            hidden_layer_size,
                            learn_init=True,
                            nonlinearity=nonlin)

    network = ReshapeLayer(network, (-1, hidden_layer_size))
    network = DenseLayer(network, num_outputs, nonlinearity=softmax)
    return network
Beispiel #4
0
def get_output_unit(input, incoming, dim_word):
    net = OrderedDict()
    # regress on combined projections
    net['output'] = DenseLayer(incoming,
                               num_units=dim_word,
                               nonlinearity=softmax,
                               name='output')

    # reshape back
    # n_batch, n_timesteps, n_features = input.input_var.shape
    n_batch = input.input_var.shape[0]
    net['reshape_output'] = ReshapeLayer(net.values()[-1],
                                         (n_batch, -1, n_targets),
                                         name='reshape_output')

    return net
Beispiel #5
0
def create_rnn(input_vars, num_inputs, depth, hidden_layer_size, num_outputs):
    # network = InputLayer((None, None, num_inputs), input_vars)
    network = lasagne.layers.InputLayer(shape=(None, 1, 1, num_inputs),
                                        input_var=input_vars)
    batch_size_theano, _, _, seqlen = network.input_var.shape

    network = GaussianNoiseLayer(network, sigma=0.05)
    for i in range(depth):
        network = RecurrentLayer(network,
                                 hidden_layer_size,
                                 W_hid_to_hid=GlorotUniform(),
                                 W_in_to_hid=GlorotUniform(),
                                 b=Constant(1.0),
                                 nonlinearity=lasagne.nonlinearities.tanh,
                                 learn_init=True)
    network = ReshapeLayer(network, (-1, hidden_layer_size))
    network = DenseLayer(network, num_outputs, nonlinearity=softmax)

    return network
Beispiel #6
0
def create_blstm(input_vars, mask_vars, num_inputs, depth, hidden_layer_size,
                 num_outputs):
    network = lasagne.layers.InputLayer(shape=(None, 1, 1, num_inputs),
                                        input_var=input_vars)
    mask = InputLayer((None, None), mask_vars)
    network = GaussianNoiseLayer(network, sigma=0.01)
    for i in range(depth):
        forward = LSTMLayer(network,
                            hidden_layer_size,
                            mask_input=mask,
                            learn_init=True)
        backward = LSTMLayer(network,
                             hidden_layer_size,
                             mask_input=mask,
                             learn_init=True,
                             backwards=True)
        network = ElemwiseSumLayer([forward, backward])
    network = ReshapeLayer(network, (-1, hidden_layer_size))
    network = DenseLayer(network, num_outputs, nonlinearity=softmax)
    return network
def build_nmt_encoder_decoder(dim_word=1,
                              n_embd=100,
                              n_units=500,
                              n_proj=200,
                              state=None,
                              rev_state=None,
                              context_type=None,
                              attention=False,
                              drop_p=None):
    enc = OrderedDict()
    enc['input'] = InputLayer((None, None), name='input')
    enc_mask = enc['mask'] = InputLayer((None, None), name='mask')
    enc_rev_mask = enc['rev_mask'] = InputLayer((None, None), name='rev_mask')

    enc['input_emb'] = EmbeddingLayer(enc.values()[-1],
                                      input_size=dim_word,
                                      output_size=n_embd,
                                      name='input_emb')

    ### ENCODER PART ###
    # rnn encoder unit
    hid_init = Constant(0.)
    hid_init_rev = Constant(0.)
    encoder_unit = get_rnn_unit(enc.values()[-1],
                                enc_mask,
                                enc_rev_mask,
                                hid_init,
                                hid_init_rev,
                                n_units,
                                prefix='encoder_')
    enc.update(encoder_unit)

    # context layer = decoder's initial state of shape (batch_size, num_units)
    context = enc.values()[-1]  # net['context']
    if context_type == 'last':
        enc['context2init'] = SliceLayer(context,
                                         indices=-1,
                                         axis=1,
                                         name='last_encoder_context')
    elif context_type == 'mean':
        enc['context2init'] = ExpressionLayer(context,
                                              mean_over_1_axis,
                                              output_shape='auto',
                                              name='mean_encoder_context')

    ### DECODER PART ###
    W_init2proj, b_init2proj = GlorotUniform(), Constant(0.)

    enc['init_state'] = DenseLayer(enc['context2init'],
                                   num_units=n_units,
                                   W=W_init2proj,
                                   b=b_init2proj,
                                   nonlinearity=tanh,
                                   name='decoder_init_state')
    if state is None:
        init_state = enc['init_state']
        init_state_rev = None  #if rev_state is None else init_state
        if not attention:
            # if simple attetion the context is 2D, else 3D
            context = enc['context2init']
    else:
        init_state = state
        init_state_rev = rev_state
        context = enc['context_input'] = \
            InputLayer((None, n_units), name='ctx_input')
    # (batch_size, nfeats)

    # (batch_size, valid ntsteps)
    enc['target'] = InputLayer((None, None), name='target')
    dec_mask = enc['target_mask'] = InputLayer((None, None),
                                               name='target_mask')

    enc['target_emb'] = EmbeddingLayer(enc.values()[-1],
                                       input_size=dim_word,
                                       output_size=n_embd,
                                       name='target_emb')
    prevdim = n_embd
    prev2rnn = enc.values()[-1]  # it's either emb or prev2rnn/noise

    decoder_unit = get_rnn_unit(prev2rnn,
                                dec_mask,
                                None,
                                init_state,
                                None,
                                n_units,
                                prefix='decoder_',
                                context=context,
                                attention=attention)
    enc.update(decoder_unit)

    if attention:
        ctxs = enc.values()[-1]
        ctxs_shape = ctxs.output_shape

        def get_ctx(x):
            return ctxs.ctx

        context = enc['context'] = ExpressionLayer(ctxs,
                                                   function=get_ctx,
                                                   output_shape=ctxs_shape,
                                                   name='context')

    # return all values'
    # reshape for feed-forward layer
    # 2D shapes of (batch_size * num_steps, num_units/num_feats)
    enc['rnn2proj'] = rnn2proj = ReshapeLayer(enc.values()[-1], (-1, n_units),
                                              name='flatten_rnn2proj')

    enc['prev2proj'] = prev2proj = ReshapeLayer(prev2rnn, (-1, prevdim),
                                                name='flatten_prev')

    if isinstance(context, ExpressionLayer):
        ctx2proj = enc['ctx2proj'] = ReshapeLayer(context,
                                                  (-1, ctxs_shape[-1]),
                                                  name='flatten_ctxs')
    else:
        ctx2proj = context

    # load shared parameters
    W_rnn2proj, b_rnn2proj = GlorotUniform(), Constant(0.)
    W_prev2proj, b_prev2proj = GlorotUniform(), Constant(0.)
    W_ctx2proj, b_ctx2proj = GlorotUniform(), Constant(0.)

    # perturb rnn-to-projection by noise
    if drop_p is not None:
        rnn2proj = enc['noise_rnn2proj'] = DropoutLayer(rnn2proj,
                                                        sigma=drop_p,
                                                        name='noise_rnn2proj')

        prev2proj = enc['drop_prev2proj'] = DropoutLayer(prev2proj,
                                                         sigma=drop_p,
                                                         name='drop_prev2proj')

        ctx2proj = enc['noise_ctx2proj'] = DropoutLayer(ctx2proj,
                                                        sigma=drop_p,
                                                        name='noise_ctx2proj')

    # project rnn
    enc['rnn_proj'] = DenseLayer(rnn2proj,
                                 num_units=n_proj,
                                 nonlinearity=linear,
                                 W=W_rnn2proj,
                                 b=b_rnn2proj,
                                 name='rnn_proj')

    # project raw targets
    enc['prev_proj'] = DenseLayer(prev2proj,
                                  num_units=n_proj,
                                  nonlinearity=linear,
                                  W=W_prev2proj,
                                  b=b_prev2proj,
                                  name='prev_proj')

    # project context
    enc['ctx_proj'] = DenseLayer(ctx2proj,
                                 num_units=n_proj,
                                 nonlinearity=linear,
                                 W=W_ctx2proj,
                                 b=b_ctx2proj,
                                 name='ctx_proj')

    # reshape back for merging
    n_batch = enc['input'].input_var.shape[0]
    rnn2merge = enc['rnn2merge'] = ReshapeLayer(enc['rnn_proj'],
                                                (n_batch, -1, n_proj),
                                                name='reshaped_rnn2proj')

    prev2merge = enc['prev2merge'] = ReshapeLayer(enc['prev_proj'],
                                                  (n_batch, -1, n_proj),
                                                  name='reshaped_prev')

    if isinstance(context, ExpressionLayer):
        ctx2merge = ReshapeLayer(enc['ctx_proj'], (n_batch, -1, n_proj),
                                 name='reshaped_prev')
    else:
        ctx2merge = enc['ctx2merge'] = DimshuffleLayer(enc['ctx_proj'],
                                                       pattern=(0, 'x', 1),
                                                       name='reshaped_context')

    # combine projections into shape (batch_size, n_steps, n_proj)
    enc['proj_merge'] = ElemwiseMergeLayer([rnn2merge, prev2merge, ctx2merge],
                                           merge_function=tanh_add,
                                           name='proj_merge')

    # reshape for output regression projection
    enc['merge2proj'] = ReshapeLayer(enc.values()[-1], (-1, n_proj),
                                     name='flatten_proj_merge')

    # perturb concatenated regressors by noise
    if drop_p is not None:
        # if noise_type == 'binary':
        enc['noise_output'] = DropoutLayer(enc.values()[-1],
                                           p=drop_p,
                                           name='noise_output')

    # regress on combined (perturbed) projections
    out = get_output_unit(enc['target'], enc.values()[-1], dim_word)
    enc.update(out)  # update graph

    return enc