def get_embedding_layer(self, l_in, extra_vars):
        language = extra_vars[0]
        context_vars = extra_vars[1:]

        id_tag = (self.id + '/') if self.id else ''

        l_lang = InputLayer(shape=(None, ),
                            input_var=language,
                            name=id_tag + 'lang_input')

        if self.options.bilingual_en_embed_file:
            en_embeddings = load_embeddings(
                self.options.bilingual_en_embed_file, self.seq_vec)
            en_embed_size = en_embeddings.shape[1]
        else:
            en_embeddings = Normal()
            en_embed_size = self.options.bilingual_embed_size

        if self.options.bilingual_zh_embed_file:
            zh_embeddings = load_embeddings(
                self.options.bilingual_zh_embed_file, self.seq_vec)
            zh_embed_size = zh_embeddings.shape[1]
        else:
            zh_embeddings = Normal()
            zh_embed_size = self.options.bilingual_embed_size

        l_en = EmbeddingLayer(l_in,
                              input_size=len(self.seq_vec.tokens),
                              output_size=en_embed_size,
                              W=en_embeddings,
                              name=id_tag + 'desc_embed_en')
        l_en_transformed = dimshuffle(l_en, (0, 2, 1))
        l_en_transformed = NINLayer(l_en_transformed,
                                    num_units=self.options.listener_cell_size,
                                    nonlinearity=None,
                                    name=id_tag + 'desc_embed_en_transformed')
        l_en_transformed = dimshuffle(l_en_transformed, (0, 2, 1))

        l_zh = EmbeddingLayer(l_in,
                              input_size=len(self.seq_vec.tokens),
                              output_size=zh_embed_size,
                              W=zh_embeddings,
                              name=id_tag + 'desc_embed_zh')
        l_zh_transformed = dimshuffle(l_zh, (0, 2, 1))
        l_zh_transformed = NINLayer(l_zh_transformed,
                                    num_units=self.options.listener_cell_size,
                                    nonlinearity=None,
                                    name=id_tag + 'desc_embed_zh_transformed')
        l_zh_transformed = dimshuffle(l_zh_transformed, (0, 2, 1))
        l_merged = SwitchLayer(l_lang, [l_en_transformed, l_zh_transformed],
                               name=id_tag + 'desc_embed_switch')
        return (l_merged, context_vars)
Beispiel #2
0
def D_paper(
    num_channels    = 1,        # Overridden based on dataset.
    resolution      = 32,       # Overridden based on dataset.
    label_size      = 0,        # Overridden based on dataset.
    fmap_base       = 4096,
    fmap_decay      = 1.0,
    fmap_max        = 256,
    mbstat_func     = 'Tstdeps',
    mbstat_avg      = 'all',
    mbdisc_kernels  = None,
    use_wscale      = True,
    use_gdrop       = True,
    use_layernorm   = False,
    **kwargs):

    R = int(np.log2(resolution))
    assert resolution == 2**R and resolution >= 4
    cur_lod = theano.shared(np.float32(0.0))
    gdrop_strength = theano.shared(np.float32(0.0))
    def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max)
    def GD(layer): return GDropLayer(layer, name=layer.name+'gd', mode='prop', strength=gdrop_strength) if use_gdrop else layer
    def LN(layer): return LayerNormLayer(layer, name=layer.name+'ln') if use_layernorm else layer
    def WS(layer): return WScaleLayer(layer, name=layer.name+'ws') if use_wscale else layer

    input_layer = InputLayer(name='Dimages', shape=[None, num_channels, 2**R, 2**R])
    net = WS(NINLayer(input_layer, name='D%dx' % (R-1), num_units=nf(R-1), nonlinearity=lrelu, W=ilrelu))

    for I in xrange(R-1, 1, -1): # I = R-1, R-2, ..., 2
        net = LN(WS(Conv2DLayer     (GD(net),     name='D%db'   % I, num_filters=nf(I),   filter_size=3, pad=1, nonlinearity=lrelu, W=ilrelu)))
        net = LN(WS(Conv2DLayer     (GD(net),     name='D%da'   % I, num_filters=nf(I-1), filter_size=3, pad=1, nonlinearity=lrelu, W=ilrelu)))
        net =       Downscale2DLayer(net,         name='D%ddn'  % I, scale_factor=2)
        lod =       Downscale2DLayer(input_layer, name='D%dxs'  % (I-1), scale_factor=2**(R-I))
        lod =    WS(NINLayer        (lod,         name='D%dx'   % (I-1), num_units=nf(I-1), nonlinearity=lrelu, W=ilrelu))
        net =       LODSelectLayer  (             name='D%dlod' % (I-1), incomings=[net, lod], cur_lod=cur_lod, first_incoming_lod=R-I-1)

    if mbstat_avg is not None:
        net = MinibatchStatConcatLayer(net, name='Dstat', func=globals()[mbstat_func], averaging=mbstat_avg)

    net = LN(WS(Conv2DLayer(GD(net), name='D1b', num_filters=nf(1), filter_size=3, pad=1, nonlinearity=lrelu, W=ilrelu)))
    net = LN(WS(Conv2DLayer(GD(net), name='D1a', num_filters=nf(0), filter_size=4, pad=0, nonlinearity=lrelu, W=ilrelu)))

    if mbdisc_kernels:
        import minibatch_discrimination
        net = minibatch_discrimination.MinibatchLayer(net, name='Dmd', num_kernels=mbdisc_kernels)

    output_layers = [WS(DenseLayer(net, name='Dscores', num_units=1, nonlinearity=linear, W=ilinear))]
    if label_size:
        output_layers += [WS(DenseLayer(net, name='Dlabels', num_units=label_size, nonlinearity=linear, W=ilinear))]
    return dict(input_layers=[input_layer], output_layers=output_layers, cur_lod=cur_lod, gdrop_strength=gdrop_strength)
Beispiel #3
0
def build_network():
    conv_defs = {
        'W': lasagne.init.HeNormal('relu'),
        'b': lasagne.init.Constant(0.0),
        'filter_size': (3, 3),
        'stride': (1, 1),
        'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1)
    }

    nin_defs = {
        'W': lasagne.init.HeNormal('relu'),
        'b': lasagne.init.Constant(0.0),
        'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1)
    }

    dense_defs = {
        'W': lasagne.init.HeNormal(1.0),
        'b': lasagne.init.Constant(0.0),
        'nonlinearity': lasagne.nonlinearities.softmax
    }

    wn_defs = {
        'momentum': .999
    }

    net = InputLayer        (     name='input',    shape=(None, 3, 32, 32))
    net = GaussianNoiseLayer(net, name='noise',    sigma=.15)
    net = WN(Conv2DLayer    (net, name='conv1a',   num_filters=128, pad='same', **conv_defs), **wn_defs)
    net = WN(Conv2DLayer    (net, name='conv1b',   num_filters=128, pad='same', **conv_defs), **wn_defs)
    net = WN(Conv2DLayer    (net, name='conv1c',   num_filters=128, pad='same', **conv_defs), **wn_defs)
    net = MaxPool2DLayer    (net, name='pool1',    pool_size=(2, 2))
    net = DropoutLayer      (net, name='drop1',    p=.5)
    net = WN(Conv2DLayer    (net, name='conv2a',   num_filters=256, pad='same', **conv_defs), **wn_defs)
    net = WN(Conv2DLayer    (net, name='conv2b',   num_filters=256, pad='same', **conv_defs), **wn_defs)
    net = WN(Conv2DLayer    (net, name='conv2c',   num_filters=256, pad='same', **conv_defs), **wn_defs)
    net = MaxPool2DLayer    (net, name='pool2',    pool_size=(2, 2))
    net = DropoutLayer      (net, name='drop2',    p=.5)
    net = WN(Conv2DLayer    (net, name='conv3a',   num_filters=512, pad=0,      **conv_defs), **wn_defs)
    net = WN(NINLayer       (net, name='conv3b',   num_units=256,               **nin_defs),  **wn_defs)
    net = WN(NINLayer       (net, name='conv3c',   num_units=128,               **nin_defs),  **wn_defs)
    net = GlobalPoolLayer   (net, name='pool3')
    net = WN(DenseLayer     (net, name='dense',    num_units=10,       **dense_defs), **wn_defs)

    return net
def build_network(input_var, num_input_channels, num_classes):
    conv_defs = {
        'W': lasagne.init.HeNormal('relu'),
        'b': lasagne.init.Constant(0.0),
        'filter_size': (3, 3),
        'stride': (1, 1),
        'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1)
    }

    nin_defs = {
        'W': lasagne.init.HeNormal('relu'),
        'b': lasagne.init.Constant(0.0),
        'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1)
    }

    dense_defs = {
        'W': lasagne.init.HeNormal(1.0),
        'b': lasagne.init.Constant(0.0),
        'nonlinearity': lasagne.nonlinearities.softmax
    }

    wn_defs = {
        'momentum': config.batch_normalization_momentum
    }

    net = InputLayer        (     name='input',    shape=(None, num_input_channels, 28, 28), input_var=input_var)
    net = GaussianNoiseLayer(net, name='noise',    sigma=config.augment_noise_stddev)
    net = WN(Conv2DLayer    (net, name='conv1a',   num_filters=32, pad='same', **conv_defs), **wn_defs)
    net = WN(Conv2DLayer    (net, name='conv1b',   num_filters=64, pad='same', **conv_defs), **wn_defs)
#    net = WN(Conv2DLayer    (net, name='conv1c',   num_filters=128, pad='same', **conv_defs), **wn_defs)
    net = MaxPool2DLayer    (net, name='pool1',    pool_size=(2, 2))
    net = DropoutLayer      (net, name='drop1',    p=.5)
    net = WN(Conv2DLayer    (net, name='conv2a',   num_filters=32, pad='same', **conv_defs), **wn_defs)
    net = WN(Conv2DLayer    (net, name='conv2b',   num_filters=64, pad='same', **conv_defs), **wn_defs)
#    net = WN(Conv2DLayer    (net, name='conv2c',   num_filters=256, pad='same', **conv_defs), **wn_defs)
    net = MaxPool2DLayer    (net, name='pool2',    pool_size=(2, 2))
    net = DropoutLayer      (net, name='drop2',    p=.5)
    net = WN(Conv2DLayer    (net, name='conv3a',   num_filters=32, pad=0,      **conv_defs), **wn_defs)
#    net = WN(NINLayer       (net, name='conv3b',   num_units=256,               **nin_defs),  **wn_defs)
    net = WN(NINLayer       (net, name='conv3c',   num_units=256,               **nin_defs),  **wn_defs)
    net = GlobalPoolLayer   (net, name='pool3')    
    net = WN(DenseLayer     (net, name='dense',    num_units=num_classes,       **dense_defs), **wn_defs)
    
    
#    net = GaussianNoiseLayer(net, name='noise',    sigma=config.augment_noise_stddev)
#    net = WN(DenseLayer     (net, name='dense1',    num_units=256,       **dense_defs), **wn_defs)
#    net = DropoutLayer      (net, name='drop1',    p=.5)
#    net = WN(DenseLayer     (net, name='dense2',    num_units=256,       **dense_defs), **wn_defs)
#    net = DropoutLayer      (net, name='drop2',    p=.5)
#    net = WN(DenseLayer     (net, name='dense3',    num_units=256,       **dense_defs), **wn_defs)
#     
#    net = WN(DenseLayer     (net, name='dense4',    num_units=num_classes,       **dense_defs), **wn_defs)


    return net
    def nin_layer(cls,
                  cur_layer,
                  name,
                  num_units,
                  dilation=1,
                  nonlinearity=lasagne.nonlinearities.rectify):
        cur_layer = NINLayer(cur_layer,
                             num_units=num_units,
                             nonlinearity=nonlinearity,
                             name=name)

        return cur_layer, dilation
Beispiel #6
0
def G_paper(
    num_channels        = 1,        # Overridden based on dataset.
    resolution          = 32,       # Overridden based on dataset.
    label_size          = 0,        # Overridden based on dataset.
    fmap_base           = 4096,
    fmap_decay          = 1.0,
    fmap_max            = 256,
    latent_size         = None,
    normalize_latents   = True,
    use_wscale          = True,
    use_pixelnorm       = True,
    use_leakyrelu       = True,
    use_batchnorm       = False,
    tanh_at_end         = None,
    **kwargs):

    R = int(np.log2(resolution))
    assert resolution == 2**R and resolution >= 4
    cur_lod = theano.shared(np.float32(0.0))
    def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max)
    def PN(layer): return PixelNormLayer(layer, name=layer.name+'pn') if use_pixelnorm else layer
    def BN(layer): return lasagne.layers.batch_norm(layer) if use_batchnorm else layer
    def WS(layer): return WScaleLayer(layer, name=layer.name+'S') if use_wscale else layer
    if latent_size is None: latent_size = nf(0)
    (act, iact) = (lrelu, ilrelu) if use_leakyrelu else (relu, irelu)

    input_layers = [InputLayer(name='Glatents', shape=[None, latent_size])]
    net = input_layers[-1]
    if normalize_latents:
        net = PixelNormLayer(net, name='Glnorm')
    if label_size:
        input_layers += [InputLayer(name='Glabels', shape=[None, label_size])]
        net = ConcatLayer(name='Gina', incomings=[net, input_layers[-1]])

    net = ReshapeLayer(name='Ginb', incoming=net, shape=[[0], [1], 1, 1])
    net = PN(BN(WS(Conv2DLayer(net, name='G1a', num_filters=nf(1), filter_size=4, pad='full', nonlinearity=act, W=iact))))
    net = PN(BN(WS(Conv2DLayer(net, name='G1b', num_filters=nf(1), filter_size=3, pad=1,      nonlinearity=act, W=iact))))
    lods  = [net]

    for I in xrange(2, R): # I = 2, 3, ..., R-1
        net = Upscale2DLayer(net, name='G%dup' % I, scale_factor=2)
        net = PN(BN(WS(Conv2DLayer(net, name='G%da'  % I, num_filters=nf(I), filter_size=3, pad=1, nonlinearity=act, W=iact))))
        net = PN(BN(WS(Conv2DLayer(net, name='G%db'  % I, num_filters=nf(I), filter_size=3, pad=1, nonlinearity=act, W=iact))))
        lods += [net]

    lods = [WS(NINLayer(l, name='Glod%d' % i, num_units=num_channels, nonlinearity=linear, W=ilinear)) for i, l in enumerate(reversed(lods))]
    output_layer = LODSelectLayer(name='Glod', incomings=lods, cur_lod=cur_lod, first_incoming_lod=0)
    if tanh_at_end is not None:
        output_layer = NonlinearityLayer(output_layer, name='Gtanh', nonlinearity=tanh)
        if tanh_at_end != 1.0:
            output_layer = non_trainable(ScaleLayer(output_layer, name='Gtanhs', scales=lasagne.init.Constant(tanh_at_end)))
    return dict(input_layers=input_layers, output_layers=[output_layer], cur_lod=cur_lod)
Beispiel #7
0
    def _get_l_out(self, input_vars):
        id_tag = (self.id + '/') if self.id else ''

        cell_size = self.options.speaker_cell_size or self.seq_vec.num_types
        l_color_repr, color_inputs = self.color_vec.get_input_layer(
            input_vars, recurrent_length=0, cell_size=cell_size, id=self.id)
        l_hidden_color = l_color_repr
        for i in range(1, self.options.speaker_hidden_color_layers + 1):
            l_hidden_color = NINLayer(
                l_hidden_color,
                num_units=cell_size,
                nonlinearity=NONLINEARITIES[self.options.speaker_nonlinearity],
                name=id_tag + 'hidden_color%d' % i)
        l_hidden_color = l_hidden_color

        if self.options.speaker_cell_size == 0:
            l_scores = l_color_repr  # BiasLayer(l_color_repr, name=id_tag + 'bias')
        else:
            if self.options.speaker_dropout > 0.0:
                l_color_drop = DropoutLayer(l_hidden_color,
                                            p=self.options.speaker_dropout,
                                            name=id_tag + 'color_drop')
            else:
                l_color_drop = l_hidden_color

            l_hidden = DenseLayer(
                l_color_drop,
                num_units=self.options.speaker_cell_size,
                nonlinearity=NONLINEARITIES[self.options.speaker_nonlinearity],
                name=id_tag + 'hidden')
            if self.options.speaker_dropout > 0.0:
                l_hidden_drop = DropoutLayer(l_hidden,
                                             p=self.options.speaker_dropout,
                                             name=id_tag + 'hidden_drop')
            else:
                l_hidden_drop = l_hidden

            l_scores = DenseLayer(l_hidden_drop,
                                  num_units=self.seq_vec.num_types,
                                  nonlinearity=None,
                                  name=id_tag + 'scores')
        l_out = NonlinearityLayer(l_scores,
                                  nonlinearity=softmax,
                                  name=id_tag + 'softmax')

        return l_out, color_inputs
Beispiel #8
0
def D_mnist_mode_recovery(
    num_channels    = 1,
    resolution      = 32,
    fmap_base       = 64,
    fmap_decay      = 1.0,
    fmap_max        = 256,
    mbstat_func     = 'Tstdeps',
    mbstat_avg      = None,         #'all',
    label_size      = 0,
    use_wscale      = False,
    use_gdrop       = False,
    use_layernorm   = False,
    use_batchnorm   = True,
    X               = 2,
    progressive     = False,
    **kwargs):

    R = int(np.log2(resolution))
    assert resolution == 2**R and resolution >= 4
    cur_lod = theano.shared(np.float32(0.0))
    gdrop_strength = theano.shared(np.float32(0.0))
    def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))) // X, fmap_max)
    def GD(layer): return GDropLayer(layer, name=layer.name+'gd', mode='prop', strength=gdrop_strength) if use_gdrop else layer
    def LN(layer): return LayerNormLayer(layer, name=layer.name+'ln') if use_layernorm else layer
    def WS(layer): return WScaleLayer(layer, name=layer.name+'ws') if use_wscale else layer
    def BN(layer): return lasagne.layers.batch_norm(layer) if use_batchnorm else layer

    net = input_layer = InputLayer(name='Dimages', shape=[None, num_channels, 2**R, 2**R])
    for I in xrange(R-1, 1, -1): # I = R-1, R-2, ..., 2     (i.e. 4,3,2)
        net = BN(LN(WS(Conv2DLayer     (GD(net),     name='D%da'   % I, num_filters=nf(I-1), filter_size=3, pad=1, nonlinearity=lrelu, W=ilrelu))))
        net =       Downscale2DLayer(net,         name='D%ddn'  % I, scale_factor=2)
        if progressive:
            lod =       Downscale2DLayer(input_layer, name='D%dxs'  % (I-1), scale_factor=2**(R-I))
            lod =    WS(NINLayer        (lod,         name='D%dx'   % (I-1), num_units=nf(I-1), nonlinearity=lrelu, W=ilrelu))
            net =       LODSelectLayer  (             name='D%dlod' % (I-1), incomings=[net, lod], cur_lod=cur_lod, first_incoming_lod=R-I-1)

    if mbstat_avg is not None:
        net = MinibatchStatConcatLayer(net, name='Dstat', func=globals()[mbstat_func], averaging=mbstat_avg)

    net = FlattenLayer(GD(net), name='Dflatten')
    output_layers = [WS(DenseLayer(net, name='Dscores', num_units=1, nonlinearity=linear, W=ilinear))]

    if label_size:
        output_layers += [WS(DenseLayer(net, name='Dlabels', num_units=label_size, nonlinearity=linear, W=ilinear))]
    return dict(input_layers=[input_layer], output_layers=output_layers, cur_lod=cur_lod, gdrop_strength=gdrop_strength)
Beispiel #9
0
    def _get_l_out(self, input_vars):
        check_options(self.options)
        id_tag = (self.id + '/') if self.id else ''

        prev_output_var, mask_var = input_vars[-2:]
        color_input_vars = input_vars[:-2]

        context_len = self.context_len if hasattr(self, 'context_len') else 1
        l_color_repr, color_inputs = self.color_vec.get_input_layer(
            color_input_vars,
            recurrent_length=self.seq_vec.max_len - 1,
            cell_size=self.options.speaker_cell_size,
            context_len=context_len,
            id=self.id)
        l_hidden_color = dimshuffle(l_color_repr, (0, 2, 1))
        for i in range(1, self.options.speaker_hidden_color_layers + 1):
            l_hidden_color = NINLayer(
                l_hidden_color,
                num_units=self.options.speaker_cell_size,
                nonlinearity=NONLINEARITIES[self.options.speaker_nonlinearity],
                name=id_tag + 'hidden_color%d' % i)
        l_hidden_color = dimshuffle(l_hidden_color, (0, 2, 1))

        l_prev_out = InputLayer(shape=(None, self.seq_vec.max_len - 1),
                                input_var=prev_output_var,
                                name=id_tag + 'prev_input')
        l_prev_embed = EmbeddingLayer(
            l_prev_out,
            input_size=len(self.seq_vec.tokens),
            output_size=self.options.speaker_cell_size,
            name=id_tag + 'prev_embed')
        l_in = ConcatLayer([l_hidden_color, l_prev_embed],
                           axis=2,
                           name=id_tag + 'color_prev')
        l_mask_in = InputLayer(shape=(None, self.seq_vec.max_len - 1),
                               input_var=mask_var,
                               name=id_tag + 'mask_input')
        l_rec_drop = l_in

        cell = CELLS[self.options.speaker_cell]
        cell_kwargs = {
            'mask_input':
            (None if self.options.speaker_no_mask else l_mask_in),
            'grad_clipping': self.options.speaker_grad_clipping,
            'num_units': self.options.speaker_cell_size,
        }
        if self.options.speaker_cell == 'LSTM':
            cell_kwargs['forgetgate'] = Gate(
                b=Constant(self.options.speaker_forget_bias))
        if self.options.speaker_cell != 'GRU':
            cell_kwargs['nonlinearity'] = NONLINEARITIES[
                self.options.speaker_nonlinearity]

        for i in range(1, self.options.speaker_recurrent_layers):
            l_rec = cell(l_rec_drop, name=id_tag + 'rec%d' % i, **cell_kwargs)
            if self.options.speaker_dropout > 0.0:
                l_rec_drop = DropoutLayer(l_rec,
                                          p=self.options.speaker_dropout,
                                          name=id_tag + 'rec%d_drop' % i)
            else:
                l_rec_drop = l_rec
        l_rec = cell(l_rec_drop,
                     name=id_tag +
                     'rec%d' % self.options.speaker_recurrent_layers,
                     **cell_kwargs)
        l_shape = ReshapeLayer(l_rec, (-1, self.options.speaker_cell_size),
                               name=id_tag + 'reshape')
        l_hidden_out = l_shape
        for i in range(1, self.options.speaker_hidden_out_layers + 1):
            l_hidden_out = DenseLayer(
                l_hidden_out,
                num_units=self.options.speaker_cell_size,
                nonlinearity=NONLINEARITIES[self.options.speaker_nonlinearity],
                name=id_tag + 'hidden_out%d' % i)
        l_softmax = DenseLayer(l_hidden_out,
                               num_units=len(self.seq_vec.tokens),
                               nonlinearity=softmax,
                               name=id_tag + 'softmax')
        l_out = ReshapeLayer(
            l_softmax,
            (-1, self.seq_vec.max_len - 1, len(self.seq_vec.tokens)),
            name=id_tag + 'out')

        return l_out, color_inputs + [l_prev_out, l_mask_in]
Beispiel #10
0
    def _get_l_out(self, input_vars):
        check_options(self.options)
        id_tag = (self.id + '/') if self.id else ''

        input_var = input_vars[0]
        context_vars = input_vars[1:]

        l_in = InputLayer(shape=(None, self.seq_vec.max_len),
                          input_var=input_var,
                          name=id_tag + 'desc_input')
        l_in_embed = EmbeddingLayer(
            l_in,
            input_size=len(self.seq_vec.tokens),
            output_size=self.options.listener_cell_size,
            name=id_tag + 'desc_embed')

        # Context repr has shape (batch_size, seq_len, context_len * repr_size)
        l_context_repr, context_inputs = self.color_vec.get_input_layer(
            context_vars,
            recurrent_length=self.seq_vec.max_len,
            cell_size=self.options.listener_cell_size,
            context_len=self.context_len,
            id=self.id)
        l_context_repr = reshape(
            l_context_repr,
            ([0], [1], self.context_len, self.color_vec.output_size))
        l_hidden_context = dimshuffle(l_context_repr, (0, 3, 1, 2),
                                      name=id_tag + 'shuffle_in')
        for i in range(1, self.options.listener_hidden_color_layers + 1):
            l_hidden_context = NINLayer(
                l_hidden_context,
                num_units=self.options.listener_cell_size,
                nonlinearity=NONLINEARITIES[
                    self.options.listener_nonlinearity],
                b=Constant(0.1),
                name=id_tag + 'hidden_context%d' % i)
        l_pool = FeaturePoolLayer(l_hidden_context,
                                  pool_size=self.context_len,
                                  axis=3,
                                  pool_function=T.mean,
                                  name=id_tag + 'pool')
        l_pool_squeezed = reshape(l_pool, ([0], [1], [2]),
                                  name=id_tag + 'pool_squeezed')
        l_pool_shuffle = dimshuffle(l_pool_squeezed, (0, 2, 1),
                                    name=id_tag + 'shuffle_out')
        l_concat = ConcatLayer([l_pool_shuffle, l_in_embed],
                               axis=2,
                               name=id_tag + 'concat_inp_context')

        cell = CELLS[self.options.listener_cell]
        cell_kwargs = {
            'grad_clipping': self.options.listener_grad_clipping,
            'num_units': self.options.listener_cell_size,
        }
        if self.options.listener_cell == 'LSTM':
            cell_kwargs['forgetgate'] = Gate(
                b=Constant(self.options.listener_forget_bias))
        if self.options.listener_cell != 'GRU':
            cell_kwargs['nonlinearity'] = NONLINEARITIES[
                self.options.listener_nonlinearity]

        # l_rec1_drop = l_concat
        l_rec1 = cell(l_concat, name=id_tag + 'rec1', **cell_kwargs)
        if self.options.listener_dropout > 0.0:
            l_rec1_drop = DropoutLayer(l_rec1,
                                       p=self.options.listener_dropout,
                                       name=id_tag + 'rec1_drop')
        else:
            l_rec1_drop = l_rec1
        l_rec2 = cell(l_rec1_drop,
                      name=id_tag + 'rec2',
                      only_return_final=True,
                      **cell_kwargs)
        if self.options.listener_dropout > 0.0:
            l_rec2_drop = DropoutLayer(l_rec2,
                                       p=self.options.listener_dropout,
                                       name=id_tag + 'rec2_drop')
        else:
            l_rec2_drop = l_rec2

        l_rec2_drop = NINLayer(l_rec2_drop,
                               num_units=self.options.listener_cell_size,
                               nonlinearity=None,
                               name=id_tag + 'rec2_dense')

        # Context is fed into the RNN as one copy for each time step; just use
        # the first time step for output.
        # Input shape: (batch_size, repr_size, seq_len, context_len)
        # Output shape: (batch_size, repr_size, context_len)
        l_context_nonrec = SliceLayer(l_hidden_context,
                                      indices=0,
                                      axis=2,
                                      name=id_tag + 'context_nonrec')
        l_pool_nonrec = SliceLayer(l_pool_squeezed,
                                   indices=0,
                                   axis=2,
                                   name=id_tag + 'pool_nonrec')

        # Output shape: (batch_size, repr_size, context_len)
        l_sub = broadcast_sub_layer(
            l_pool_nonrec,
            l_context_nonrec,
            feature_dim=self.options.listener_cell_size,
            id_tag=id_tag)
        # Output shape: (batch_size, repr_size * 2, context_len)
        l_concat_sub = ConcatLayer([l_context_nonrec, l_sub],
                                   axis=1,
                                   name=id_tag + 'concat_inp_context')
        # Output shape: (batch_size, cell_size, context_len)
        l_hidden = NINLayer(l_concat_sub,
                            num_units=self.options.listener_cell_size,
                            nonlinearity=None,
                            name=id_tag + 'hidden')
        if self.options.listener_dropout > 0.0:
            l_hidden_drop = DropoutLayer(l_hidden,
                                         p=self.options.listener_dropout,
                                         name=id_tag + 'hidden_drop')
        else:
            l_hidden_drop = l_hidden

        l_dot = broadcast_dot_layer(
            l_rec2_drop,
            l_hidden_drop,
            feature_dim=self.options.listener_cell_size,
            id_tag=id_tag)
        l_dot_bias = l_dot  # BiasLayer(l_dot, name=id_tag + 'dot_bias')
        l_dot_clipped = NonlinearityLayer(
            l_dot_bias,
            nonlinearity=NONLINEARITIES[self.options.listener_nonlinearity],
            name=id_tag + 'dot_clipped')
        l_scores = NonlinearityLayer(l_dot_clipped,
                                     nonlinearity=softmax,
                                     name=id_tag + 'scores')

        return l_scores, [l_in] + context_inputs
Beispiel #11
0
    def _get_l_out(self, input_vars):
        check_options(self.options)
        id_tag = (self.id + '/') if self.id else ''

        input_var = input_vars[0]
        context_vars = input_vars[1:]

        l_in = InputLayer(shape=(None, self.seq_vec.max_len),
                          input_var=input_var,
                          name=id_tag + 'desc_input')
        l_in_embed = EmbeddingLayer(
            l_in,
            input_size=len(self.seq_vec.tokens),
            output_size=self.options.listener_cell_size,
            name=id_tag + 'desc_embed')

        cell = CELLS[self.options.listener_cell]
        cell_kwargs = {
            'grad_clipping': self.options.listener_grad_clipping,
            'num_units': self.options.listener_cell_size,
        }
        if self.options.listener_cell == 'LSTM':
            cell_kwargs['forgetgate'] = Gate(
                b=Constant(self.options.listener_forget_bias))
        if self.options.listener_cell != 'GRU':
            cell_kwargs['nonlinearity'] = NONLINEARITIES[
                self.options.listener_nonlinearity]

        l_rec1 = cell(l_in_embed, name=id_tag + 'rec1', **cell_kwargs)
        if self.options.listener_dropout > 0.0:
            l_rec1_drop = DropoutLayer(l_rec1,
                                       p=self.options.listener_dropout,
                                       name=id_tag + 'rec1_drop')
        else:
            l_rec1_drop = l_rec1
        l_rec2 = cell(l_rec1_drop,
                      name=id_tag + 'rec2',
                      only_return_final=True,
                      **cell_kwargs)
        if self.options.listener_dropout > 0.0:
            l_rec2_drop = DropoutLayer(l_rec2,
                                       p=self.options.listener_dropout,
                                       name=id_tag + 'rec2_drop')
        else:
            l_rec2_drop = l_rec2
        # add only_return_final to l_rec1 and uncomment next line to remove second layer
        # l_rec2_drop = l_rec1_drop

        # Context repr has shape (batch_size, context_len * repr_size)
        l_context_repr, context_inputs = self.color_vec.get_input_layer(
            context_vars,
            cell_size=self.options.listener_cell_size,
            context_len=self.context_len,
            id=self.id)
        l_concat = ConcatLayer([l_context_repr, l_rec2_drop],
                               axis=1,
                               name=id_tag + 'concat_context_rec2')
        l_hidden_drop = l_concat
        for i in range(1, self.options.listener_hidden_color_layers + 1):
            l_hidden = NINLayer(l_hidden_drop,
                                num_units=self.options.listener_cell_size,
                                nonlinearity=NONLINEARITIES[
                                    self.options.listener_nonlinearity],
                                name=id_tag + 'hidden_combined%d' % i)
            if self.options.listener_dropout > 0.0:
                l_hidden_drop = DropoutLayer(l_hidden,
                                             p=self.options.listener_dropout,
                                             name=id_tag + 'hidden_drop')
            else:
                l_hidden_drop = l_hidden

        l_scores = DenseLayer(l_hidden_drop,
                              num_units=self.context_len,
                              nonlinearity=softmax,
                              name=id_tag + 'scores')

        return l_scores, [l_in] + context_inputs
Beispiel #12
0
    def _get_l_out(self, input_vars, multi_utt='ignored'):
        check_options(self.options)
        id_tag = (self.id + '/') if self.id else ''

        input_var = input_vars[0]
        extra_vars = input_vars[1:]

        l_in = InputLayer(shape=(None, self.seq_vec.max_len), input_var=input_var,
                          name=id_tag + 'desc_input')
        l_in_embed, context_vars = self.get_embedding_layer(l_in, extra_vars)

        # Context repr has shape (batch_size, seq_len, context_len * repr_size)
        l_context_repr, context_inputs = self.color_vec.get_input_layer(
            context_vars,
            recurrent_length=self.seq_vec.max_len,
            cell_size=self.options.listener_cell_size,
            context_len=self.context_len,
            id=self.id
        )
        l_hidden_context = dimshuffle(l_context_repr, (0, 2, 1))
        for i in range(1, self.options.listener_hidden_color_layers + 1):
            l_hidden_context = NINLayer(
                l_hidden_context, num_units=self.options.listener_cell_size,
                nonlinearity=NONLINEARITIES[self.options.listener_nonlinearity],
                name=id_tag + 'hidden_context%d' % i)
        l_hidden_context = dimshuffle(l_hidden_context, (0, 2, 1))
        l_concat = ConcatLayer([l_hidden_context, l_in_embed], axis=2,
                               name=id_tag + 'concat_inp_context')

        cell = CELLS[self.options.listener_cell]
        cell_kwargs = {
            'grad_clipping': self.options.listener_grad_clipping,
            'num_units': self.options.listener_cell_size,
        }
        if self.options.listener_cell == 'LSTM':
            cell_kwargs['forgetgate'] = Gate(b=Constant(self.options.listener_forget_bias))
        if self.options.listener_cell != 'GRU':
            cell_kwargs['nonlinearity'] = NONLINEARITIES[self.options.listener_nonlinearity]

        l_rec1 = cell(l_concat, name=id_tag + 'rec1', **cell_kwargs)
        if self.options.listener_dropout > 0.0:
            l_rec1_drop = DropoutLayer(l_rec1, p=self.options.listener_dropout,
                                       name=id_tag + 'rec1_drop')
        else:
            l_rec1_drop = l_rec1
        l_rec2 = cell(l_rec1_drop, name=id_tag + 'rec2', **cell_kwargs)
        if self.options.listener_dropout > 0.0:
            l_rec2_drop = DropoutLayer(l_rec2, p=self.options.listener_dropout,
                                       name=id_tag + 'rec2_drop')
        else:
            l_rec2_drop = l_rec2

        l_hidden = DenseLayer(l_rec2_drop, num_units=self.options.listener_cell_size,
                              nonlinearity=NONLINEARITIES[self.options.listener_nonlinearity],
                              name=id_tag + 'hidden')
        if self.options.listener_dropout > 0.0:
            l_hidden_drop = DropoutLayer(l_hidden, p=self.options.listener_dropout,
                                         name=id_tag + 'hidden_drop')
        else:
            l_hidden_drop = l_hidden
        l_scores = DenseLayer(l_hidden_drop, num_units=self.context_len, nonlinearity=softmax,
                              name=id_tag + 'scores')

        return l_scores, [l_in] + context_inputs
Beispiel #13
0
def inception_module(l_in,
                     num_1x1,
                     num_3x1_proj,
                     reduce_3x1,
                     num_3x1,
                     reduce_5x1,
                     num_5x1,
                     batch_norm=False,
                     gain=1.0,
                     bias=0.1,
                     nonlinearity=rectify):
    """
    Inception module for sequences
    :param l_in:
    :param num_1x1:
    :param num_3x1_proj:
    :param reduce_3x1:
    :param num_3x1:
    :param reduce_5x1:
    :param num_5x1:
    :param gain:
    :param bias:
    :return:
    """
    out_layers = []

    # 1x1
    if num_1x1 > 0:
        l_1x1 = NINLayer(l_in,
                         num_units=num_1x1,
                         W=lasagne.init.GlorotUniform(),
                         b=None,
                         nonlinearity=None,
                         name='inception_1x1')
        l_1x1_bn = BatchNormalizeLayer(l_1x1, batch_norm, nonlinearity)
        out_layers.append(l_1x1_bn)

    # 3x1
    if num_3x1 > 0:
        if reduce_3x1 > 0:
            l_reduce_3x1 = NINLayer(l_in,
                                    num_units=reduce_3x1,
                                    W=lasagne.init.GlorotUniform(),
                                    b=None,
                                    nonlinearity=None,
                                    name='inception_reduce_3x1')
            l_reduce_3x1_bn = BatchNormalizeLayer(l_reduce_3x1, batch_norm,
                                                  nonlinearity)
        else:
            l_reduce_3x1_bn = l_in
        l_3x1 = Conv2DLayer(l_reduce_3x1_bn,
                            num_filters=num_3x1,
                            filter_size=(3, 1),
                            pad="same",
                            W=lasagne.init.GlorotUniform(),
                            b=None,
                            nonlinearity=None,
                            name='inception_3x1')
        l_3x1_bn = BatchNormalizeLayer(l_3x1, batch_norm, nonlinearity)
        out_layers.append(l_3x1_bn)

    # 5x1
    if num_5x1 > 0:
        if reduce_5x1 > 0:
            l_reduce_5x1 = NINLayer(l_in,
                                    num_units=reduce_5x1,
                                    W=lasagne.init.GlorotUniform(),
                                    b=None,
                                    nonlinearity=None,
                                    name='inception_reduce_5x1')
            l_reduce_5x1_bn = BatchNormalizeLayer(l_reduce_5x1, batch_norm,
                                                  nonlinearity)
        else:
            l_reduce_5x1_bn = l_in

        l_5x1 = Conv2DLayer(l_reduce_5x1_bn,
                            num_filters=num_5x1,
                            filter_size=(3, 1),
                            pad="same",
                            W=lasagne.init.GlorotUniform(),
                            b=None,
                            nonlinearity=None,
                            name='inception_5x1/1')
        l_5x1_bn = BatchNormalizeLayer(l_5x1, batch_norm, nonlinearity)

        l_5x1 = Conv2DLayer(l_5x1_bn,
                            num_filters=num_5x1,
                            filter_size=(3, 1),
                            pad="same",
                            W=lasagne.init.GlorotUniform(),
                            b=None,
                            nonlinearity=None,
                            name='inception_5x1/2')
        l_5x1_bn = BatchNormalizeLayer(l_5x1, batch_norm, nonlinearity)
        out_layers.append(l_5x1_bn)

    if num_3x1_proj > 0:
        l_3x1_pool = MaxPool2DLayer(l_in,
                                    pool_size=(3, 1),
                                    stride=(1, 1),
                                    pad=(1, 0),
                                    name='inception_pool')
        l_3x1_proj = NINLayer(l_3x1_pool,
                              num_units=num_3x1_proj,
                              b=None,
                              nonlinearity=None,
                              name='inception_pool_proj')
        l_3x1_proj_bn = BatchNormalizeLayer(l_3x1_proj, batch_norm,
                                            nonlinearity)
        out_layers.append(l_3x1_proj_bn)

    # stack
    l_out = ConcatLayer(out_layers, axis=1, name='Inception module')
    return l_out