def test_affine_wrapper(backend_default): """ Verify that the Affine wrapper constructs the right layer objects. """ nout = 11 aff = Affine(nout, Uniform()) assert isinstance(aff, list) assert len(aff) == 1 assert isinstance(aff[0], Linear) assert aff[0].nout == nout aff = Affine(nout, Uniform(), bias=Uniform()) assert isinstance(aff, list) assert len(aff) == 2 assert isinstance(aff[0], Linear) assert isinstance(aff[1], Bias) aff = Affine(nout, Uniform(), activation=Rectlin()) assert isinstance(aff, list) assert len(aff) == 2 assert isinstance(aff[0], Linear) assert isinstance(aff[1], Activation) aff = Affine(nout, Uniform(), bias=Uniform(), activation=Rectlin()) assert isinstance(aff, list) assert len(aff) == 3 assert isinstance(aff[0], Linear) assert isinstance(aff[1], Bias) assert isinstance(aff[2], Activation)
def __init__(self, sentence_length, token_vocab_size, pos_vocab_size=None, char_vocab_size=None, max_char_word_length=20, token_embedding_size=None, pos_embedding_size=None, char_embedding_size=None, num_labels=None, lstm_hidden_size=100, num_lstm_layers=1, use_external_embedding=None, dropout=0.5): init = GlorotUniform() tokens = [] if use_external_embedding is None: tokens.append( LookupTable(vocab_size=token_vocab_size, embedding_dim=token_embedding_size, init=init, pad_idx=0)) else: tokens.append(DataInput()) tokens.append(Reshape((-1, sentence_length))) f_layers = [tokens] # add POS tag input if pos_vocab_size is not None and pos_embedding_size is not None: f_layers.append([ LookupTable(vocab_size=pos_vocab_size, embedding_dim=pos_embedding_size, init=init, pad_idx=0), Reshape((-1, sentence_length)) ]) # add Character RNN input if char_vocab_size is not None and char_embedding_size is not None: char_lut_layer = LookupTable(vocab_size=char_vocab_size, embedding_dim=char_embedding_size, init=init, pad_idx=0) char_nn = [ char_lut_layer, TimeDistBiLSTM(char_embedding_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True, reset_freq=max_char_word_length), TimeDistributedRecurrentLast(timesteps=max_char_word_length), Reshape((-1, sentence_length)) ] f_layers.append(char_nn) layers = [] if len(f_layers) == 1: layers.append(f_layers[0][0]) else: layers.append(MergeMultistream(layers=f_layers, merge="stack")) layers.append(Reshape((-1, sentence_length))) layers += [ DeepBiLSTM(lstm_hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True, depth=num_lstm_layers), Dropout(keep=dropout), Affine(num_labels, init, bias=init, activation=Softmax()) ] self._model = Model(layers=layers)
def num_filters_handler(network, flags, stacks, this_model): paramlayers = [] if 'sharegroup2params' not in this_model: this_model['sharegroup2params'] = {} sharegroup2params = this_model['sharegroup2params'] if 'layer2sharegroup' not in this_model: this_model['layer2sharegroup'] = {} layer2sharegroup = this_model['layer2sharegroup'] if 'constlayer2sharegroup' not in this_model: this_model['constlayer2sharegroup'] = {} constlayer2sharegroup = this_model['constlayer2sharegroup'] num_filters = flags['num_filters'] conv_stride = flags.get('stride', 0) layername = flags.get('layername', None) filter_size = flags.get('filter_size', 0) bn = flags.get('bn', False) if conv_stride == 0 or conv_stride == 1: pad = filter_size//2 elif conv_stride > 0: if filter_size == conv_stride: pad = 0 else: pad = filter_size//2 else: # conv_stride<0 num_filters = num_filters*(-conv_stride)*(-conv_stride) if 'nopad' not in flags: pad = filter_size//2 else: pad = 0 if 'pad' in flags: pad = flags['pad'] nonlinearity = None if 'linear' in flags: pass elif 'nonlinearity' in flags: nonlinearity = flags['nonlinearity'] else: nonlinearity = this_model.get('relu', neon.transforms.Rectlin()) sharegroup = flags.get('sharegroup', 0) # if sharegroup and sharegroup in sharegroup2params: # paramlayer = None # sharegroup2params[sharegroup] # else: # paramlayer = None init = this_model.get('init', GlorotUniform()) if 'init' in flags: init = flags['init'] if 'init_gain' in flags: init = GlorotUniform(gain=flags['init_gain']) else: if nonlinearity == neon.transforms.Rectlin and nonlinearity.slope > 0: alpha = nonlinearity.slope init = GlorotUniform(gain=math.sqrt(2/(1+alpha**2))) elif nonlinearity == neon.transforms.Rectlin: init = GlorotUniform(gain='relu') else: pass if 'nobias' in flags: bias = None else: bias = neon.initializers.Constant(0.0) # utils.walk(network) dim = len(get_output_shape(network))-2 if 'dense' in flags or dim <= 1: paramlayer = sequential(layers=Affine( nout=num_filters, init=init, bias=bias, batch_norm=bn, activation=nonlinearity)) if sharegroup: if 'const' in flags: constlayer2sharegroup[paramlayer] = sharegroup else: layer2sharegroup[paramlayer] = sharegroup network = sequential(layers=( network, paramlayer, )) else: # input_shape = lasagne.layers.get_output_shape(network) if 'local' not in flags: assert filter_size > 0 paramlayer = sequential(layers=Conv( fshape=(filter_size,)*dim+(num_filters,), init=init, bias=bias, strides=max(1, conv_stride), padding=pad, activation=nonlinearity, name=layername, batch_norm=bn, dilation=-conv_stride if conv_stride < 0 else {} )) if sharegroup: if 'const' in flags: constlayer2sharegroup[paramlayer] = sharegroup else: layer2sharegroup[paramlayer] = sharegroup network = sequential(layers=( network, paramlayer, )) else: # local raise NotImplementedError paramlayers += [paramlayer] if sharegroup and sharegroup not in sharegroup2params: sharegroup2params[sharegroup] = ['W', 'b'] if 'saveparamlayer' in flags and flags['saveparamlayer'] is not None: g = flags['saveparamlayer'] if g not in stacks: stacks[g] = [] stacks[g] += [paramlayer] return network, paramlayers