Esempio n. 1
0
def test_affine_wrapper(backend_default):
    """
    Verify that the Affine wrapper constructs the right layer objects.
    """
    nout = 11
    aff = Affine(nout, Uniform())
    assert isinstance(aff, list)
    assert len(aff) == 1
    assert isinstance(aff[0], Linear)
    assert aff[0].nout == nout

    aff = Affine(nout, Uniform(), bias=Uniform())
    assert isinstance(aff, list)
    assert len(aff) == 2
    assert isinstance(aff[0], Linear)
    assert isinstance(aff[1], Bias)

    aff = Affine(nout, Uniform(), activation=Rectlin())
    assert isinstance(aff, list)
    assert len(aff) == 2
    assert isinstance(aff[0], Linear)
    assert isinstance(aff[1], Activation)

    aff = Affine(nout, Uniform(), bias=Uniform(), activation=Rectlin())
    assert isinstance(aff, list)
    assert len(aff) == 3
    assert isinstance(aff[0], Linear)
    assert isinstance(aff[1], Bias)
    assert isinstance(aff[2], Activation)
Esempio n. 2
0
    def __init__(self,
                 sentence_length,
                 token_vocab_size,
                 pos_vocab_size=None,
                 char_vocab_size=None,
                 max_char_word_length=20,
                 token_embedding_size=None,
                 pos_embedding_size=None,
                 char_embedding_size=None,
                 num_labels=None,
                 lstm_hidden_size=100,
                 num_lstm_layers=1,
                 use_external_embedding=None,
                 dropout=0.5):

        init = GlorotUniform()
        tokens = []
        if use_external_embedding is None:
            tokens.append(
                LookupTable(vocab_size=token_vocab_size,
                            embedding_dim=token_embedding_size,
                            init=init,
                            pad_idx=0))
        else:
            tokens.append(DataInput())
        tokens.append(Reshape((-1, sentence_length)))
        f_layers = [tokens]

        # add POS tag input
        if pos_vocab_size is not None and pos_embedding_size is not None:
            f_layers.append([
                LookupTable(vocab_size=pos_vocab_size,
                            embedding_dim=pos_embedding_size,
                            init=init,
                            pad_idx=0),
                Reshape((-1, sentence_length))
            ])

        # add Character RNN input
        if char_vocab_size is not None and char_embedding_size is not None:
            char_lut_layer = LookupTable(vocab_size=char_vocab_size,
                                         embedding_dim=char_embedding_size,
                                         init=init,
                                         pad_idx=0)
            char_nn = [
                char_lut_layer,
                TimeDistBiLSTM(char_embedding_size,
                               init,
                               activation=Logistic(),
                               gate_activation=Tanh(),
                               reset_cells=True,
                               reset_freq=max_char_word_length),
                TimeDistributedRecurrentLast(timesteps=max_char_word_length),
                Reshape((-1, sentence_length))
            ]

            f_layers.append(char_nn)

        layers = []
        if len(f_layers) == 1:
            layers.append(f_layers[0][0])
        else:
            layers.append(MergeMultistream(layers=f_layers, merge="stack"))
            layers.append(Reshape((-1, sentence_length)))
        layers += [
            DeepBiLSTM(lstm_hidden_size,
                       init,
                       activation=Logistic(),
                       gate_activation=Tanh(),
                       reset_cells=True,
                       depth=num_lstm_layers),
            Dropout(keep=dropout),
            Affine(num_labels, init, bias=init, activation=Softmax())
        ]
        self._model = Model(layers=layers)
Esempio n. 3
0
def num_filters_handler(network, flags, stacks, this_model):
    paramlayers = []
    if 'sharegroup2params' not in this_model:
        this_model['sharegroup2params'] = {}
    sharegroup2params = this_model['sharegroup2params']

    if 'layer2sharegroup' not in this_model:
        this_model['layer2sharegroup'] = {}
    layer2sharegroup = this_model['layer2sharegroup']
    if 'constlayer2sharegroup' not in this_model:
        this_model['constlayer2sharegroup'] = {}
    constlayer2sharegroup = this_model['constlayer2sharegroup']

    num_filters = flags['num_filters']
    conv_stride = flags.get('stride', 0)
    layername = flags.get('layername', None)
    filter_size = flags.get('filter_size', 0)
    bn = flags.get('bn', False)

    if conv_stride == 0 or conv_stride == 1:
        pad = filter_size//2
    elif conv_stride > 0:
        if filter_size == conv_stride:
            pad = 0
        else:
            pad = filter_size//2
    else:  # conv_stride<0
        num_filters = num_filters*(-conv_stride)*(-conv_stride)
        if 'nopad' not in flags:
            pad = filter_size//2
        else:
            pad = 0
    if 'pad' in flags:
        pad = flags['pad']
    nonlinearity = None
    if 'linear' in flags:
        pass
    elif 'nonlinearity' in flags:
        nonlinearity = flags['nonlinearity']
    else:
        nonlinearity = this_model.get('relu', neon.transforms.Rectlin())

    sharegroup = flags.get('sharegroup', 0)

    # if sharegroup and sharegroup in sharegroup2params:
    #    paramlayer = None  # sharegroup2params[sharegroup]
    # else:
    #    paramlayer = None
    init = this_model.get('init', GlorotUniform())
    if 'init' in flags:
        init = flags['init']
    if 'init_gain' in flags:
        init = GlorotUniform(gain=flags['init_gain'])
    else:
        if nonlinearity == neon.transforms.Rectlin and nonlinearity.slope > 0:
            alpha = nonlinearity.slope
            init = GlorotUniform(gain=math.sqrt(2/(1+alpha**2)))
        elif nonlinearity == neon.transforms.Rectlin:
            init = GlorotUniform(gain='relu')
        else:
            pass
    if 'nobias' in flags:
        bias = None
    else:
        bias = neon.initializers.Constant(0.0)

    # utils.walk(network)
    dim = len(get_output_shape(network))-2

    if 'dense' in flags or dim <= 1:
        paramlayer = sequential(layers=Affine(
                nout=num_filters,
                init=init,
                bias=bias,
                batch_norm=bn,
                activation=nonlinearity))
        if sharegroup:
            if 'const' in flags:
                constlayer2sharegroup[paramlayer] = sharegroup
            else:
                layer2sharegroup[paramlayer] = sharegroup
        network = sequential(layers=(
            network,
            paramlayer,
            ))
    else:
        # input_shape = lasagne.layers.get_output_shape(network)
        if 'local' not in flags:
            assert filter_size > 0
            paramlayer = sequential(layers=Conv(
                    fshape=(filter_size,)*dim+(num_filters,),
                    init=init,
                    bias=bias,
                    strides=max(1, conv_stride),
                    padding=pad,
                    activation=nonlinearity,
                    name=layername,
                    batch_norm=bn,
                    dilation=-conv_stride if conv_stride < 0 else {}
                    ))
            if sharegroup:
                if 'const' in flags:
                    constlayer2sharegroup[paramlayer] = sharegroup
                else:
                    layer2sharegroup[paramlayer] = sharegroup
            network = sequential(layers=(
                network,
                paramlayer,
                ))
        else:  # local
            raise NotImplementedError
    paramlayers += [paramlayer]
    if sharegroup and sharegroup not in sharegroup2params:
        sharegroup2params[sharegroup] = ['W', 'b']
    if 'saveparamlayer' in flags and flags['saveparamlayer'] is not None:
        g = flags['saveparamlayer']
        if g not in stacks:
            stacks[g] = []
        stacks[g] += [paramlayer]
    return network, paramlayers