예제 #1
0
    def test_tuple_shape(self, func, input_layer, ExpressionLayer):
        from lasagne.layers.helper import get_output

        X, expected = self.np_result(func, input_layer)
        layer = ExpressionLayer(input_layer, func, output_shape=expected.shape)
        assert layer.get_output_shape_for(X.shape) == expected.shape

        output = get_output(layer, X).eval()
        assert np.allclose(output, expected)
예제 #2
0
    def test_tuple_shape(self, func, input_layer, ExpressionLayer):
        from lasagne.layers.helper import get_output

        X, expected = self.np_result(func, input_layer)
        layer = ExpressionLayer(input_layer, func, output_shape=expected.shape)
        assert layer.get_output_shape_for(X.shape) == expected.shape

        output = get_output(layer, X).eval()
        assert np.allclose(output, expected)
예제 #3
0
    def test_callable_shape(self, func, input_layer, ExpressionLayer):
        from lasagne.layers.helper import get_output

        X, expected = self.np_result(func, input_layer)

        def get_shape(input_shape):
            return func(np.empty(shape=input_shape)).shape

        layer = ExpressionLayer(input_layer, func, output_shape=get_shape)
        assert layer.get_output_shape_for(X.shape) == expected.shape

        output = get_output(layer, X).eval()
        assert np.allclose(output, expected)
예제 #4
0
    def test_callable_shape(self, func, input_layer, ExpressionLayer):
        from lasagne.layers.helper import get_output

        X, expected = self.np_result(func, input_layer)

        def get_shape(input_shape):
            return func(np.empty(shape=input_shape)).shape

        layer = ExpressionLayer(input_layer, func, output_shape=get_shape)
        assert layer.get_output_shape_for(X.shape) == expected.shape

        output = get_output(layer, X).eval()
        assert np.allclose(output, expected)
예제 #5
0
    def test_nones_shape(self, func, input_layer_nones, ExpressionLayer):
        input_shape = input_layer_nones.output_shape
        np_shape = tuple(0 if s is None else s for s in input_shape)
        X = np.random.uniform(-1, 1, np_shape)
        expected = func(X)
        expected_shape = tuple(s if s else None for s in expected.shape)

        layer = ExpressionLayer(input_layer_nones,
                                func,
                                output_shape=expected_shape)
        assert layer.get_output_shape_for(input_shape) == expected_shape

        def get_shape(input_shape):
            return expected_shape

        layer = ExpressionLayer(input_layer_nones,
                                func,
                                output_shape=get_shape)
        assert layer.get_output_shape_for(input_shape) == expected_shape

        layer = ExpressionLayer(input_layer_nones, func, output_shape='auto')
        assert layer.get_output_shape_for(input_shape) == expected_shape
예제 #6
0
    def test_nones_shape(self, func, input_layer_nones, ExpressionLayer):
        input_shape = input_layer_nones.output_shape
        np_shape = tuple(0 if s is None else s for s in input_shape)
        X = np.random.uniform(-1, 1, np_shape)
        expected = func(X)
        expected_shape = tuple(s if s else None for s in expected.shape)

        layer = ExpressionLayer(input_layer_nones,
                                func,
                                output_shape=expected_shape)
        assert layer.get_output_shape_for(input_shape) == expected_shape

        def get_shape(input_shape):
            return expected_shape
        layer = ExpressionLayer(input_layer_nones,
                                func,
                                output_shape=get_shape)
        assert layer.get_output_shape_for(input_shape) == expected_shape

        layer = ExpressionLayer(input_layer_nones,
                                func,
                                output_shape='auto')
        assert layer.get_output_shape_for(input_shape) == expected_shape
예제 #7
0
def residual_block(
    l,
    batch_norm_alpha,
    batch_norm_epsilon,
    nonlinearity,
    survival_prob,
    add_after_nonlin,
    reduction_method,
    reduction_pool_mode,
    increase_units_factor=None,
    half_time=False,
    projection=False,
):
    assert survival_prob <= 1 and survival_prob >= 0
    input_num_filters = l.output_shape[1]
    if increase_units_factor is not None:
        out_num_filters = int(input_num_filters * increase_units_factor)
        assert (out_num_filters - input_num_filters) % 2 == 0, (
            "Need even "
            "number of extra channels in order to be able to pad correctly")
    else:
        out_num_filters = input_num_filters

    if (not half_time) or (reduction_method == 'conv'):
        stack_1 = batch_norm(Conv2DLayer(l,
                                         num_filters=out_num_filters,
                                         filter_size=(3, 3),
                                         stride=(1, 1),
                                         nonlinearity=nonlinearity,
                                         pad='same',
                                         W=lasagne.init.HeNormal(gain='relu')),
                             epsilon=batch_norm_epsilon,
                             alpha=batch_norm_alpha)
    else:
        assert half_time and reduction_method == 'pool'
        stack_1 = Pool2DLayer(l,
                              pool_size=(3, 1),
                              stride=(1, 1),
                              pad=(1, 0),
                              mode=reduction_pool_mode)
        # 1x1 conv here, therefore can do stride later without problems
        # otherwise would have to do stride here before
        # and make extra if condition later (only reshape with stride
        # in case of reduction method conv)...
        stack_1 = batch_norm(Conv2DLayer(stack_1,
                                         num_filters=out_num_filters,
                                         filter_size=(1, 1),
                                         stride=(1, 1),
                                         nonlinearity=nonlinearity,
                                         pad='same',
                                         W=lasagne.init.HeNormal(gain='relu')),
                             epsilon=batch_norm_epsilon,
                             alpha=batch_norm_alpha)

    if half_time:
        stack_1 = StrideReshapeLayer(stack_1, n_stride=2)
    stack_2 = batch_norm(Conv2DLayer(stack_1,
                                     num_filters=out_num_filters,
                                     filter_size=(3, 3),
                                     stride=(1, 1),
                                     nonlinearity=None,
                                     pad='same',
                                     W=lasagne.init.HeNormal(gain='relu')),
                         epsilon=batch_norm_epsilon,
                         alpha=batch_norm_alpha)

    # add shortcut connections
    shortcut = l
    if half_time:
        # note since we are only reshaping
        # this is ok both for later identity and later projection
        # 1x1 conv of projection is same if we do it before or after this reshape
        # (would not be true if it was anything but 1x1 conv(!))
        shortcut = StrideReshapeLayer(shortcut, n_stride=2)
    if increase_units_factor is not None:
        if projection:
            # projection shortcut, as option B in paper
            shortcut = batch_norm(Conv2DLayer(shortcut,
                                              num_filters=out_num_filters,
                                              filter_size=(1, 1),
                                              stride=(1, 1),
                                              nonlinearity=None,
                                              pad='same',
                                              b=None),
                                  epsilon=batch_norm_epsilon,
                                  alpha=batch_norm_alpha)
        else:
            # identity shortcut, as option A in paper
            n_extra_chans = out_num_filters - input_num_filters
            shortcut = PadLayer(shortcut, [n_extra_chans // 2, 0, 0],
                                batch_ndim=1)
    if add_after_nonlin:
        stack_2 = NonlinearityLayer(stack_2)
        block = ElemwiseSumLayer([stack_2, shortcut])
    else:
        block = NonlinearityLayer(ElemwiseSumLayer([stack_2, shortcut]),
                                  nonlinearity=nonlinearity)
    if survival_prob != 1:
        # Hack to make both be broadcastable along empty third dim
        # Otherwise I get an error that they are of different type:
        # shortcut: TensorType(False,False,False,True)
        # block: TensorType4d(32) or sth
        shortcut = ExpressionLayer(shortcut, lambda x: T.addbroadcast(x, 3))
        block = ExpressionLayer(block, lambda x: T.addbroadcast(x, 3))
        block = RandomSwitchLayer(block, shortcut, survival_prob)
    return block
def build_nmt_encoder_decoder(dim_word=1,
                              n_embd=100,
                              n_units=500,
                              n_proj=200,
                              state=None,
                              rev_state=None,
                              context_type=None,
                              attention=False,
                              drop_p=None):
    enc = OrderedDict()
    enc['input'] = InputLayer((None, None), name='input')
    enc_mask = enc['mask'] = InputLayer((None, None), name='mask')
    enc_rev_mask = enc['rev_mask'] = InputLayer((None, None), name='rev_mask')

    enc['input_emb'] = EmbeddingLayer(enc.values()[-1],
                                      input_size=dim_word,
                                      output_size=n_embd,
                                      name='input_emb')

    ### ENCODER PART ###
    # rnn encoder unit
    hid_init = Constant(0.)
    hid_init_rev = Constant(0.)
    encoder_unit = get_rnn_unit(enc.values()[-1],
                                enc_mask,
                                enc_rev_mask,
                                hid_init,
                                hid_init_rev,
                                n_units,
                                prefix='encoder_')
    enc.update(encoder_unit)

    # context layer = decoder's initial state of shape (batch_size, num_units)
    context = enc.values()[-1]  # net['context']
    if context_type == 'last':
        enc['context2init'] = SliceLayer(context,
                                         indices=-1,
                                         axis=1,
                                         name='last_encoder_context')
    elif context_type == 'mean':
        enc['context2init'] = ExpressionLayer(context,
                                              mean_over_1_axis,
                                              output_shape='auto',
                                              name='mean_encoder_context')

    ### DECODER PART ###
    W_init2proj, b_init2proj = GlorotUniform(), Constant(0.)

    enc['init_state'] = DenseLayer(enc['context2init'],
                                   num_units=n_units,
                                   W=W_init2proj,
                                   b=b_init2proj,
                                   nonlinearity=tanh,
                                   name='decoder_init_state')
    if state is None:
        init_state = enc['init_state']
        init_state_rev = None  #if rev_state is None else init_state
        if not attention:
            # if simple attetion the context is 2D, else 3D
            context = enc['context2init']
    else:
        init_state = state
        init_state_rev = rev_state
        context = enc['context_input'] = \
            InputLayer((None, n_units), name='ctx_input')
    # (batch_size, nfeats)

    # (batch_size, valid ntsteps)
    enc['target'] = InputLayer((None, None), name='target')
    dec_mask = enc['target_mask'] = InputLayer((None, None),
                                               name='target_mask')

    enc['target_emb'] = EmbeddingLayer(enc.values()[-1],
                                       input_size=dim_word,
                                       output_size=n_embd,
                                       name='target_emb')
    prevdim = n_embd
    prev2rnn = enc.values()[-1]  # it's either emb or prev2rnn/noise

    decoder_unit = get_rnn_unit(prev2rnn,
                                dec_mask,
                                None,
                                init_state,
                                None,
                                n_units,
                                prefix='decoder_',
                                context=context,
                                attention=attention)
    enc.update(decoder_unit)

    if attention:
        ctxs = enc.values()[-1]
        ctxs_shape = ctxs.output_shape

        def get_ctx(x):
            return ctxs.ctx

        context = enc['context'] = ExpressionLayer(ctxs,
                                                   function=get_ctx,
                                                   output_shape=ctxs_shape,
                                                   name='context')

    # return all values'
    # reshape for feed-forward layer
    # 2D shapes of (batch_size * num_steps, num_units/num_feats)
    enc['rnn2proj'] = rnn2proj = ReshapeLayer(enc.values()[-1], (-1, n_units),
                                              name='flatten_rnn2proj')

    enc['prev2proj'] = prev2proj = ReshapeLayer(prev2rnn, (-1, prevdim),
                                                name='flatten_prev')

    if isinstance(context, ExpressionLayer):
        ctx2proj = enc['ctx2proj'] = ReshapeLayer(context,
                                                  (-1, ctxs_shape[-1]),
                                                  name='flatten_ctxs')
    else:
        ctx2proj = context

    # load shared parameters
    W_rnn2proj, b_rnn2proj = GlorotUniform(), Constant(0.)
    W_prev2proj, b_prev2proj = GlorotUniform(), Constant(0.)
    W_ctx2proj, b_ctx2proj = GlorotUniform(), Constant(0.)

    # perturb rnn-to-projection by noise
    if drop_p is not None:
        rnn2proj = enc['noise_rnn2proj'] = DropoutLayer(rnn2proj,
                                                        sigma=drop_p,
                                                        name='noise_rnn2proj')

        prev2proj = enc['drop_prev2proj'] = DropoutLayer(prev2proj,
                                                         sigma=drop_p,
                                                         name='drop_prev2proj')

        ctx2proj = enc['noise_ctx2proj'] = DropoutLayer(ctx2proj,
                                                        sigma=drop_p,
                                                        name='noise_ctx2proj')

    # project rnn
    enc['rnn_proj'] = DenseLayer(rnn2proj,
                                 num_units=n_proj,
                                 nonlinearity=linear,
                                 W=W_rnn2proj,
                                 b=b_rnn2proj,
                                 name='rnn_proj')

    # project raw targets
    enc['prev_proj'] = DenseLayer(prev2proj,
                                  num_units=n_proj,
                                  nonlinearity=linear,
                                  W=W_prev2proj,
                                  b=b_prev2proj,
                                  name='prev_proj')

    # project context
    enc['ctx_proj'] = DenseLayer(ctx2proj,
                                 num_units=n_proj,
                                 nonlinearity=linear,
                                 W=W_ctx2proj,
                                 b=b_ctx2proj,
                                 name='ctx_proj')

    # reshape back for merging
    n_batch = enc['input'].input_var.shape[0]
    rnn2merge = enc['rnn2merge'] = ReshapeLayer(enc['rnn_proj'],
                                                (n_batch, -1, n_proj),
                                                name='reshaped_rnn2proj')

    prev2merge = enc['prev2merge'] = ReshapeLayer(enc['prev_proj'],
                                                  (n_batch, -1, n_proj),
                                                  name='reshaped_prev')

    if isinstance(context, ExpressionLayer):
        ctx2merge = ReshapeLayer(enc['ctx_proj'], (n_batch, -1, n_proj),
                                 name='reshaped_prev')
    else:
        ctx2merge = enc['ctx2merge'] = DimshuffleLayer(enc['ctx_proj'],
                                                       pattern=(0, 'x', 1),
                                                       name='reshaped_context')

    # combine projections into shape (batch_size, n_steps, n_proj)
    enc['proj_merge'] = ElemwiseMergeLayer([rnn2merge, prev2merge, ctx2merge],
                                           merge_function=tanh_add,
                                           name='proj_merge')

    # reshape for output regression projection
    enc['merge2proj'] = ReshapeLayer(enc.values()[-1], (-1, n_proj),
                                     name='flatten_proj_merge')

    # perturb concatenated regressors by noise
    if drop_p is not None:
        # if noise_type == 'binary':
        enc['noise_output'] = DropoutLayer(enc.values()[-1],
                                           p=drop_p,
                                           name='noise_output')

    # regress on combined (perturbed) projections
    out = get_output_unit(enc['target'], enc.values()[-1], dim_word)
    enc.update(out)  # update graph

    return enc