def get_embedding_layer(self, l_in, extra_vars): language = extra_vars[0] context_vars = extra_vars[1:] id_tag = (self.id + '/') if self.id else '' l_lang = InputLayer(shape=(None, ), input_var=language, name=id_tag + 'lang_input') if self.options.bilingual_en_embed_file: en_embeddings = load_embeddings( self.options.bilingual_en_embed_file, self.seq_vec) en_embed_size = en_embeddings.shape[1] else: en_embeddings = Normal() en_embed_size = self.options.bilingual_embed_size if self.options.bilingual_zh_embed_file: zh_embeddings = load_embeddings( self.options.bilingual_zh_embed_file, self.seq_vec) zh_embed_size = zh_embeddings.shape[1] else: zh_embeddings = Normal() zh_embed_size = self.options.bilingual_embed_size l_en = EmbeddingLayer(l_in, input_size=len(self.seq_vec.tokens), output_size=en_embed_size, W=en_embeddings, name=id_tag + 'desc_embed_en') l_en_transformed = dimshuffle(l_en, (0, 2, 1)) l_en_transformed = NINLayer(l_en_transformed, num_units=self.options.listener_cell_size, nonlinearity=None, name=id_tag + 'desc_embed_en_transformed') l_en_transformed = dimshuffle(l_en_transformed, (0, 2, 1)) l_zh = EmbeddingLayer(l_in, input_size=len(self.seq_vec.tokens), output_size=zh_embed_size, W=zh_embeddings, name=id_tag + 'desc_embed_zh') l_zh_transformed = dimshuffle(l_zh, (0, 2, 1)) l_zh_transformed = NINLayer(l_zh_transformed, num_units=self.options.listener_cell_size, nonlinearity=None, name=id_tag + 'desc_embed_zh_transformed') l_zh_transformed = dimshuffle(l_zh_transformed, (0, 2, 1)) l_merged = SwitchLayer(l_lang, [l_en_transformed, l_zh_transformed], name=id_tag + 'desc_embed_switch') return (l_merged, context_vars)
def D_paper( num_channels = 1, # Overridden based on dataset. resolution = 32, # Overridden based on dataset. label_size = 0, # Overridden based on dataset. fmap_base = 4096, fmap_decay = 1.0, fmap_max = 256, mbstat_func = 'Tstdeps', mbstat_avg = 'all', mbdisc_kernels = None, use_wscale = True, use_gdrop = True, use_layernorm = False, **kwargs): R = int(np.log2(resolution)) assert resolution == 2**R and resolution >= 4 cur_lod = theano.shared(np.float32(0.0)) gdrop_strength = theano.shared(np.float32(0.0)) def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max) def GD(layer): return GDropLayer(layer, name=layer.name+'gd', mode='prop', strength=gdrop_strength) if use_gdrop else layer def LN(layer): return LayerNormLayer(layer, name=layer.name+'ln') if use_layernorm else layer def WS(layer): return WScaleLayer(layer, name=layer.name+'ws') if use_wscale else layer input_layer = InputLayer(name='Dimages', shape=[None, num_channels, 2**R, 2**R]) net = WS(NINLayer(input_layer, name='D%dx' % (R-1), num_units=nf(R-1), nonlinearity=lrelu, W=ilrelu)) for I in xrange(R-1, 1, -1): # I = R-1, R-2, ..., 2 net = LN(WS(Conv2DLayer (GD(net), name='D%db' % I, num_filters=nf(I), filter_size=3, pad=1, nonlinearity=lrelu, W=ilrelu))) net = LN(WS(Conv2DLayer (GD(net), name='D%da' % I, num_filters=nf(I-1), filter_size=3, pad=1, nonlinearity=lrelu, W=ilrelu))) net = Downscale2DLayer(net, name='D%ddn' % I, scale_factor=2) lod = Downscale2DLayer(input_layer, name='D%dxs' % (I-1), scale_factor=2**(R-I)) lod = WS(NINLayer (lod, name='D%dx' % (I-1), num_units=nf(I-1), nonlinearity=lrelu, W=ilrelu)) net = LODSelectLayer ( name='D%dlod' % (I-1), incomings=[net, lod], cur_lod=cur_lod, first_incoming_lod=R-I-1) if mbstat_avg is not None: net = MinibatchStatConcatLayer(net, name='Dstat', func=globals()[mbstat_func], averaging=mbstat_avg) net = LN(WS(Conv2DLayer(GD(net), name='D1b', num_filters=nf(1), filter_size=3, pad=1, nonlinearity=lrelu, W=ilrelu))) net = LN(WS(Conv2DLayer(GD(net), name='D1a', num_filters=nf(0), filter_size=4, pad=0, nonlinearity=lrelu, W=ilrelu))) if mbdisc_kernels: import minibatch_discrimination net = minibatch_discrimination.MinibatchLayer(net, name='Dmd', num_kernels=mbdisc_kernels) output_layers = [WS(DenseLayer(net, name='Dscores', num_units=1, nonlinearity=linear, W=ilinear))] if label_size: output_layers += [WS(DenseLayer(net, name='Dlabels', num_units=label_size, nonlinearity=linear, W=ilinear))] return dict(input_layers=[input_layer], output_layers=output_layers, cur_lod=cur_lod, gdrop_strength=gdrop_strength)
def build_network(): conv_defs = { 'W': lasagne.init.HeNormal('relu'), 'b': lasagne.init.Constant(0.0), 'filter_size': (3, 3), 'stride': (1, 1), 'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1) } nin_defs = { 'W': lasagne.init.HeNormal('relu'), 'b': lasagne.init.Constant(0.0), 'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1) } dense_defs = { 'W': lasagne.init.HeNormal(1.0), 'b': lasagne.init.Constant(0.0), 'nonlinearity': lasagne.nonlinearities.softmax } wn_defs = { 'momentum': .999 } net = InputLayer ( name='input', shape=(None, 3, 32, 32)) net = GaussianNoiseLayer(net, name='noise', sigma=.15) net = WN(Conv2DLayer (net, name='conv1a', num_filters=128, pad='same', **conv_defs), **wn_defs) net = WN(Conv2DLayer (net, name='conv1b', num_filters=128, pad='same', **conv_defs), **wn_defs) net = WN(Conv2DLayer (net, name='conv1c', num_filters=128, pad='same', **conv_defs), **wn_defs) net = MaxPool2DLayer (net, name='pool1', pool_size=(2, 2)) net = DropoutLayer (net, name='drop1', p=.5) net = WN(Conv2DLayer (net, name='conv2a', num_filters=256, pad='same', **conv_defs), **wn_defs) net = WN(Conv2DLayer (net, name='conv2b', num_filters=256, pad='same', **conv_defs), **wn_defs) net = WN(Conv2DLayer (net, name='conv2c', num_filters=256, pad='same', **conv_defs), **wn_defs) net = MaxPool2DLayer (net, name='pool2', pool_size=(2, 2)) net = DropoutLayer (net, name='drop2', p=.5) net = WN(Conv2DLayer (net, name='conv3a', num_filters=512, pad=0, **conv_defs), **wn_defs) net = WN(NINLayer (net, name='conv3b', num_units=256, **nin_defs), **wn_defs) net = WN(NINLayer (net, name='conv3c', num_units=128, **nin_defs), **wn_defs) net = GlobalPoolLayer (net, name='pool3') net = WN(DenseLayer (net, name='dense', num_units=10, **dense_defs), **wn_defs) return net
def build_network(input_var, num_input_channels, num_classes): conv_defs = { 'W': lasagne.init.HeNormal('relu'), 'b': lasagne.init.Constant(0.0), 'filter_size': (3, 3), 'stride': (1, 1), 'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1) } nin_defs = { 'W': lasagne.init.HeNormal('relu'), 'b': lasagne.init.Constant(0.0), 'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1) } dense_defs = { 'W': lasagne.init.HeNormal(1.0), 'b': lasagne.init.Constant(0.0), 'nonlinearity': lasagne.nonlinearities.softmax } wn_defs = { 'momentum': config.batch_normalization_momentum } net = InputLayer ( name='input', shape=(None, num_input_channels, 28, 28), input_var=input_var) net = GaussianNoiseLayer(net, name='noise', sigma=config.augment_noise_stddev) net = WN(Conv2DLayer (net, name='conv1a', num_filters=32, pad='same', **conv_defs), **wn_defs) net = WN(Conv2DLayer (net, name='conv1b', num_filters=64, pad='same', **conv_defs), **wn_defs) # net = WN(Conv2DLayer (net, name='conv1c', num_filters=128, pad='same', **conv_defs), **wn_defs) net = MaxPool2DLayer (net, name='pool1', pool_size=(2, 2)) net = DropoutLayer (net, name='drop1', p=.5) net = WN(Conv2DLayer (net, name='conv2a', num_filters=32, pad='same', **conv_defs), **wn_defs) net = WN(Conv2DLayer (net, name='conv2b', num_filters=64, pad='same', **conv_defs), **wn_defs) # net = WN(Conv2DLayer (net, name='conv2c', num_filters=256, pad='same', **conv_defs), **wn_defs) net = MaxPool2DLayer (net, name='pool2', pool_size=(2, 2)) net = DropoutLayer (net, name='drop2', p=.5) net = WN(Conv2DLayer (net, name='conv3a', num_filters=32, pad=0, **conv_defs), **wn_defs) # net = WN(NINLayer (net, name='conv3b', num_units=256, **nin_defs), **wn_defs) net = WN(NINLayer (net, name='conv3c', num_units=256, **nin_defs), **wn_defs) net = GlobalPoolLayer (net, name='pool3') net = WN(DenseLayer (net, name='dense', num_units=num_classes, **dense_defs), **wn_defs) # net = GaussianNoiseLayer(net, name='noise', sigma=config.augment_noise_stddev) # net = WN(DenseLayer (net, name='dense1', num_units=256, **dense_defs), **wn_defs) # net = DropoutLayer (net, name='drop1', p=.5) # net = WN(DenseLayer (net, name='dense2', num_units=256, **dense_defs), **wn_defs) # net = DropoutLayer (net, name='drop2', p=.5) # net = WN(DenseLayer (net, name='dense3', num_units=256, **dense_defs), **wn_defs) # # net = WN(DenseLayer (net, name='dense4', num_units=num_classes, **dense_defs), **wn_defs) return net
def nin_layer(cls, cur_layer, name, num_units, dilation=1, nonlinearity=lasagne.nonlinearities.rectify): cur_layer = NINLayer(cur_layer, num_units=num_units, nonlinearity=nonlinearity, name=name) return cur_layer, dilation
def G_paper( num_channels = 1, # Overridden based on dataset. resolution = 32, # Overridden based on dataset. label_size = 0, # Overridden based on dataset. fmap_base = 4096, fmap_decay = 1.0, fmap_max = 256, latent_size = None, normalize_latents = True, use_wscale = True, use_pixelnorm = True, use_leakyrelu = True, use_batchnorm = False, tanh_at_end = None, **kwargs): R = int(np.log2(resolution)) assert resolution == 2**R and resolution >= 4 cur_lod = theano.shared(np.float32(0.0)) def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max) def PN(layer): return PixelNormLayer(layer, name=layer.name+'pn') if use_pixelnorm else layer def BN(layer): return lasagne.layers.batch_norm(layer) if use_batchnorm else layer def WS(layer): return WScaleLayer(layer, name=layer.name+'S') if use_wscale else layer if latent_size is None: latent_size = nf(0) (act, iact) = (lrelu, ilrelu) if use_leakyrelu else (relu, irelu) input_layers = [InputLayer(name='Glatents', shape=[None, latent_size])] net = input_layers[-1] if normalize_latents: net = PixelNormLayer(net, name='Glnorm') if label_size: input_layers += [InputLayer(name='Glabels', shape=[None, label_size])] net = ConcatLayer(name='Gina', incomings=[net, input_layers[-1]]) net = ReshapeLayer(name='Ginb', incoming=net, shape=[[0], [1], 1, 1]) net = PN(BN(WS(Conv2DLayer(net, name='G1a', num_filters=nf(1), filter_size=4, pad='full', nonlinearity=act, W=iact)))) net = PN(BN(WS(Conv2DLayer(net, name='G1b', num_filters=nf(1), filter_size=3, pad=1, nonlinearity=act, W=iact)))) lods = [net] for I in xrange(2, R): # I = 2, 3, ..., R-1 net = Upscale2DLayer(net, name='G%dup' % I, scale_factor=2) net = PN(BN(WS(Conv2DLayer(net, name='G%da' % I, num_filters=nf(I), filter_size=3, pad=1, nonlinearity=act, W=iact)))) net = PN(BN(WS(Conv2DLayer(net, name='G%db' % I, num_filters=nf(I), filter_size=3, pad=1, nonlinearity=act, W=iact)))) lods += [net] lods = [WS(NINLayer(l, name='Glod%d' % i, num_units=num_channels, nonlinearity=linear, W=ilinear)) for i, l in enumerate(reversed(lods))] output_layer = LODSelectLayer(name='Glod', incomings=lods, cur_lod=cur_lod, first_incoming_lod=0) if tanh_at_end is not None: output_layer = NonlinearityLayer(output_layer, name='Gtanh', nonlinearity=tanh) if tanh_at_end != 1.0: output_layer = non_trainable(ScaleLayer(output_layer, name='Gtanhs', scales=lasagne.init.Constant(tanh_at_end))) return dict(input_layers=input_layers, output_layers=[output_layer], cur_lod=cur_lod)
def _get_l_out(self, input_vars): id_tag = (self.id + '/') if self.id else '' cell_size = self.options.speaker_cell_size or self.seq_vec.num_types l_color_repr, color_inputs = self.color_vec.get_input_layer( input_vars, recurrent_length=0, cell_size=cell_size, id=self.id) l_hidden_color = l_color_repr for i in range(1, self.options.speaker_hidden_color_layers + 1): l_hidden_color = NINLayer( l_hidden_color, num_units=cell_size, nonlinearity=NONLINEARITIES[self.options.speaker_nonlinearity], name=id_tag + 'hidden_color%d' % i) l_hidden_color = l_hidden_color if self.options.speaker_cell_size == 0: l_scores = l_color_repr # BiasLayer(l_color_repr, name=id_tag + 'bias') else: if self.options.speaker_dropout > 0.0: l_color_drop = DropoutLayer(l_hidden_color, p=self.options.speaker_dropout, name=id_tag + 'color_drop') else: l_color_drop = l_hidden_color l_hidden = DenseLayer( l_color_drop, num_units=self.options.speaker_cell_size, nonlinearity=NONLINEARITIES[self.options.speaker_nonlinearity], name=id_tag + 'hidden') if self.options.speaker_dropout > 0.0: l_hidden_drop = DropoutLayer(l_hidden, p=self.options.speaker_dropout, name=id_tag + 'hidden_drop') else: l_hidden_drop = l_hidden l_scores = DenseLayer(l_hidden_drop, num_units=self.seq_vec.num_types, nonlinearity=None, name=id_tag + 'scores') l_out = NonlinearityLayer(l_scores, nonlinearity=softmax, name=id_tag + 'softmax') return l_out, color_inputs
def D_mnist_mode_recovery( num_channels = 1, resolution = 32, fmap_base = 64, fmap_decay = 1.0, fmap_max = 256, mbstat_func = 'Tstdeps', mbstat_avg = None, #'all', label_size = 0, use_wscale = False, use_gdrop = False, use_layernorm = False, use_batchnorm = True, X = 2, progressive = False, **kwargs): R = int(np.log2(resolution)) assert resolution == 2**R and resolution >= 4 cur_lod = theano.shared(np.float32(0.0)) gdrop_strength = theano.shared(np.float32(0.0)) def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))) // X, fmap_max) def GD(layer): return GDropLayer(layer, name=layer.name+'gd', mode='prop', strength=gdrop_strength) if use_gdrop else layer def LN(layer): return LayerNormLayer(layer, name=layer.name+'ln') if use_layernorm else layer def WS(layer): return WScaleLayer(layer, name=layer.name+'ws') if use_wscale else layer def BN(layer): return lasagne.layers.batch_norm(layer) if use_batchnorm else layer net = input_layer = InputLayer(name='Dimages', shape=[None, num_channels, 2**R, 2**R]) for I in xrange(R-1, 1, -1): # I = R-1, R-2, ..., 2 (i.e. 4,3,2) net = BN(LN(WS(Conv2DLayer (GD(net), name='D%da' % I, num_filters=nf(I-1), filter_size=3, pad=1, nonlinearity=lrelu, W=ilrelu)))) net = Downscale2DLayer(net, name='D%ddn' % I, scale_factor=2) if progressive: lod = Downscale2DLayer(input_layer, name='D%dxs' % (I-1), scale_factor=2**(R-I)) lod = WS(NINLayer (lod, name='D%dx' % (I-1), num_units=nf(I-1), nonlinearity=lrelu, W=ilrelu)) net = LODSelectLayer ( name='D%dlod' % (I-1), incomings=[net, lod], cur_lod=cur_lod, first_incoming_lod=R-I-1) if mbstat_avg is not None: net = MinibatchStatConcatLayer(net, name='Dstat', func=globals()[mbstat_func], averaging=mbstat_avg) net = FlattenLayer(GD(net), name='Dflatten') output_layers = [WS(DenseLayer(net, name='Dscores', num_units=1, nonlinearity=linear, W=ilinear))] if label_size: output_layers += [WS(DenseLayer(net, name='Dlabels', num_units=label_size, nonlinearity=linear, W=ilinear))] return dict(input_layers=[input_layer], output_layers=output_layers, cur_lod=cur_lod, gdrop_strength=gdrop_strength)
def _get_l_out(self, input_vars): check_options(self.options) id_tag = (self.id + '/') if self.id else '' prev_output_var, mask_var = input_vars[-2:] color_input_vars = input_vars[:-2] context_len = self.context_len if hasattr(self, 'context_len') else 1 l_color_repr, color_inputs = self.color_vec.get_input_layer( color_input_vars, recurrent_length=self.seq_vec.max_len - 1, cell_size=self.options.speaker_cell_size, context_len=context_len, id=self.id) l_hidden_color = dimshuffle(l_color_repr, (0, 2, 1)) for i in range(1, self.options.speaker_hidden_color_layers + 1): l_hidden_color = NINLayer( l_hidden_color, num_units=self.options.speaker_cell_size, nonlinearity=NONLINEARITIES[self.options.speaker_nonlinearity], name=id_tag + 'hidden_color%d' % i) l_hidden_color = dimshuffle(l_hidden_color, (0, 2, 1)) l_prev_out = InputLayer(shape=(None, self.seq_vec.max_len - 1), input_var=prev_output_var, name=id_tag + 'prev_input') l_prev_embed = EmbeddingLayer( l_prev_out, input_size=len(self.seq_vec.tokens), output_size=self.options.speaker_cell_size, name=id_tag + 'prev_embed') l_in = ConcatLayer([l_hidden_color, l_prev_embed], axis=2, name=id_tag + 'color_prev') l_mask_in = InputLayer(shape=(None, self.seq_vec.max_len - 1), input_var=mask_var, name=id_tag + 'mask_input') l_rec_drop = l_in cell = CELLS[self.options.speaker_cell] cell_kwargs = { 'mask_input': (None if self.options.speaker_no_mask else l_mask_in), 'grad_clipping': self.options.speaker_grad_clipping, 'num_units': self.options.speaker_cell_size, } if self.options.speaker_cell == 'LSTM': cell_kwargs['forgetgate'] = Gate( b=Constant(self.options.speaker_forget_bias)) if self.options.speaker_cell != 'GRU': cell_kwargs['nonlinearity'] = NONLINEARITIES[ self.options.speaker_nonlinearity] for i in range(1, self.options.speaker_recurrent_layers): l_rec = cell(l_rec_drop, name=id_tag + 'rec%d' % i, **cell_kwargs) if self.options.speaker_dropout > 0.0: l_rec_drop = DropoutLayer(l_rec, p=self.options.speaker_dropout, name=id_tag + 'rec%d_drop' % i) else: l_rec_drop = l_rec l_rec = cell(l_rec_drop, name=id_tag + 'rec%d' % self.options.speaker_recurrent_layers, **cell_kwargs) l_shape = ReshapeLayer(l_rec, (-1, self.options.speaker_cell_size), name=id_tag + 'reshape') l_hidden_out = l_shape for i in range(1, self.options.speaker_hidden_out_layers + 1): l_hidden_out = DenseLayer( l_hidden_out, num_units=self.options.speaker_cell_size, nonlinearity=NONLINEARITIES[self.options.speaker_nonlinearity], name=id_tag + 'hidden_out%d' % i) l_softmax = DenseLayer(l_hidden_out, num_units=len(self.seq_vec.tokens), nonlinearity=softmax, name=id_tag + 'softmax') l_out = ReshapeLayer( l_softmax, (-1, self.seq_vec.max_len - 1, len(self.seq_vec.tokens)), name=id_tag + 'out') return l_out, color_inputs + [l_prev_out, l_mask_in]
def _get_l_out(self, input_vars): check_options(self.options) id_tag = (self.id + '/') if self.id else '' input_var = input_vars[0] context_vars = input_vars[1:] l_in = InputLayer(shape=(None, self.seq_vec.max_len), input_var=input_var, name=id_tag + 'desc_input') l_in_embed = EmbeddingLayer( l_in, input_size=len(self.seq_vec.tokens), output_size=self.options.listener_cell_size, name=id_tag + 'desc_embed') # Context repr has shape (batch_size, seq_len, context_len * repr_size) l_context_repr, context_inputs = self.color_vec.get_input_layer( context_vars, recurrent_length=self.seq_vec.max_len, cell_size=self.options.listener_cell_size, context_len=self.context_len, id=self.id) l_context_repr = reshape( l_context_repr, ([0], [1], self.context_len, self.color_vec.output_size)) l_hidden_context = dimshuffle(l_context_repr, (0, 3, 1, 2), name=id_tag + 'shuffle_in') for i in range(1, self.options.listener_hidden_color_layers + 1): l_hidden_context = NINLayer( l_hidden_context, num_units=self.options.listener_cell_size, nonlinearity=NONLINEARITIES[ self.options.listener_nonlinearity], b=Constant(0.1), name=id_tag + 'hidden_context%d' % i) l_pool = FeaturePoolLayer(l_hidden_context, pool_size=self.context_len, axis=3, pool_function=T.mean, name=id_tag + 'pool') l_pool_squeezed = reshape(l_pool, ([0], [1], [2]), name=id_tag + 'pool_squeezed') l_pool_shuffle = dimshuffle(l_pool_squeezed, (0, 2, 1), name=id_tag + 'shuffle_out') l_concat = ConcatLayer([l_pool_shuffle, l_in_embed], axis=2, name=id_tag + 'concat_inp_context') cell = CELLS[self.options.listener_cell] cell_kwargs = { 'grad_clipping': self.options.listener_grad_clipping, 'num_units': self.options.listener_cell_size, } if self.options.listener_cell == 'LSTM': cell_kwargs['forgetgate'] = Gate( b=Constant(self.options.listener_forget_bias)) if self.options.listener_cell != 'GRU': cell_kwargs['nonlinearity'] = NONLINEARITIES[ self.options.listener_nonlinearity] # l_rec1_drop = l_concat l_rec1 = cell(l_concat, name=id_tag + 'rec1', **cell_kwargs) if self.options.listener_dropout > 0.0: l_rec1_drop = DropoutLayer(l_rec1, p=self.options.listener_dropout, name=id_tag + 'rec1_drop') else: l_rec1_drop = l_rec1 l_rec2 = cell(l_rec1_drop, name=id_tag + 'rec2', only_return_final=True, **cell_kwargs) if self.options.listener_dropout > 0.0: l_rec2_drop = DropoutLayer(l_rec2, p=self.options.listener_dropout, name=id_tag + 'rec2_drop') else: l_rec2_drop = l_rec2 l_rec2_drop = NINLayer(l_rec2_drop, num_units=self.options.listener_cell_size, nonlinearity=None, name=id_tag + 'rec2_dense') # Context is fed into the RNN as one copy for each time step; just use # the first time step for output. # Input shape: (batch_size, repr_size, seq_len, context_len) # Output shape: (batch_size, repr_size, context_len) l_context_nonrec = SliceLayer(l_hidden_context, indices=0, axis=2, name=id_tag + 'context_nonrec') l_pool_nonrec = SliceLayer(l_pool_squeezed, indices=0, axis=2, name=id_tag + 'pool_nonrec') # Output shape: (batch_size, repr_size, context_len) l_sub = broadcast_sub_layer( l_pool_nonrec, l_context_nonrec, feature_dim=self.options.listener_cell_size, id_tag=id_tag) # Output shape: (batch_size, repr_size * 2, context_len) l_concat_sub = ConcatLayer([l_context_nonrec, l_sub], axis=1, name=id_tag + 'concat_inp_context') # Output shape: (batch_size, cell_size, context_len) l_hidden = NINLayer(l_concat_sub, num_units=self.options.listener_cell_size, nonlinearity=None, name=id_tag + 'hidden') if self.options.listener_dropout > 0.0: l_hidden_drop = DropoutLayer(l_hidden, p=self.options.listener_dropout, name=id_tag + 'hidden_drop') else: l_hidden_drop = l_hidden l_dot = broadcast_dot_layer( l_rec2_drop, l_hidden_drop, feature_dim=self.options.listener_cell_size, id_tag=id_tag) l_dot_bias = l_dot # BiasLayer(l_dot, name=id_tag + 'dot_bias') l_dot_clipped = NonlinearityLayer( l_dot_bias, nonlinearity=NONLINEARITIES[self.options.listener_nonlinearity], name=id_tag + 'dot_clipped') l_scores = NonlinearityLayer(l_dot_clipped, nonlinearity=softmax, name=id_tag + 'scores') return l_scores, [l_in] + context_inputs
def _get_l_out(self, input_vars): check_options(self.options) id_tag = (self.id + '/') if self.id else '' input_var = input_vars[0] context_vars = input_vars[1:] l_in = InputLayer(shape=(None, self.seq_vec.max_len), input_var=input_var, name=id_tag + 'desc_input') l_in_embed = EmbeddingLayer( l_in, input_size=len(self.seq_vec.tokens), output_size=self.options.listener_cell_size, name=id_tag + 'desc_embed') cell = CELLS[self.options.listener_cell] cell_kwargs = { 'grad_clipping': self.options.listener_grad_clipping, 'num_units': self.options.listener_cell_size, } if self.options.listener_cell == 'LSTM': cell_kwargs['forgetgate'] = Gate( b=Constant(self.options.listener_forget_bias)) if self.options.listener_cell != 'GRU': cell_kwargs['nonlinearity'] = NONLINEARITIES[ self.options.listener_nonlinearity] l_rec1 = cell(l_in_embed, name=id_tag + 'rec1', **cell_kwargs) if self.options.listener_dropout > 0.0: l_rec1_drop = DropoutLayer(l_rec1, p=self.options.listener_dropout, name=id_tag + 'rec1_drop') else: l_rec1_drop = l_rec1 l_rec2 = cell(l_rec1_drop, name=id_tag + 'rec2', only_return_final=True, **cell_kwargs) if self.options.listener_dropout > 0.0: l_rec2_drop = DropoutLayer(l_rec2, p=self.options.listener_dropout, name=id_tag + 'rec2_drop') else: l_rec2_drop = l_rec2 # add only_return_final to l_rec1 and uncomment next line to remove second layer # l_rec2_drop = l_rec1_drop # Context repr has shape (batch_size, context_len * repr_size) l_context_repr, context_inputs = self.color_vec.get_input_layer( context_vars, cell_size=self.options.listener_cell_size, context_len=self.context_len, id=self.id) l_concat = ConcatLayer([l_context_repr, l_rec2_drop], axis=1, name=id_tag + 'concat_context_rec2') l_hidden_drop = l_concat for i in range(1, self.options.listener_hidden_color_layers + 1): l_hidden = NINLayer(l_hidden_drop, num_units=self.options.listener_cell_size, nonlinearity=NONLINEARITIES[ self.options.listener_nonlinearity], name=id_tag + 'hidden_combined%d' % i) if self.options.listener_dropout > 0.0: l_hidden_drop = DropoutLayer(l_hidden, p=self.options.listener_dropout, name=id_tag + 'hidden_drop') else: l_hidden_drop = l_hidden l_scores = DenseLayer(l_hidden_drop, num_units=self.context_len, nonlinearity=softmax, name=id_tag + 'scores') return l_scores, [l_in] + context_inputs
def _get_l_out(self, input_vars, multi_utt='ignored'): check_options(self.options) id_tag = (self.id + '/') if self.id else '' input_var = input_vars[0] extra_vars = input_vars[1:] l_in = InputLayer(shape=(None, self.seq_vec.max_len), input_var=input_var, name=id_tag + 'desc_input') l_in_embed, context_vars = self.get_embedding_layer(l_in, extra_vars) # Context repr has shape (batch_size, seq_len, context_len * repr_size) l_context_repr, context_inputs = self.color_vec.get_input_layer( context_vars, recurrent_length=self.seq_vec.max_len, cell_size=self.options.listener_cell_size, context_len=self.context_len, id=self.id ) l_hidden_context = dimshuffle(l_context_repr, (0, 2, 1)) for i in range(1, self.options.listener_hidden_color_layers + 1): l_hidden_context = NINLayer( l_hidden_context, num_units=self.options.listener_cell_size, nonlinearity=NONLINEARITIES[self.options.listener_nonlinearity], name=id_tag + 'hidden_context%d' % i) l_hidden_context = dimshuffle(l_hidden_context, (0, 2, 1)) l_concat = ConcatLayer([l_hidden_context, l_in_embed], axis=2, name=id_tag + 'concat_inp_context') cell = CELLS[self.options.listener_cell] cell_kwargs = { 'grad_clipping': self.options.listener_grad_clipping, 'num_units': self.options.listener_cell_size, } if self.options.listener_cell == 'LSTM': cell_kwargs['forgetgate'] = Gate(b=Constant(self.options.listener_forget_bias)) if self.options.listener_cell != 'GRU': cell_kwargs['nonlinearity'] = NONLINEARITIES[self.options.listener_nonlinearity] l_rec1 = cell(l_concat, name=id_tag + 'rec1', **cell_kwargs) if self.options.listener_dropout > 0.0: l_rec1_drop = DropoutLayer(l_rec1, p=self.options.listener_dropout, name=id_tag + 'rec1_drop') else: l_rec1_drop = l_rec1 l_rec2 = cell(l_rec1_drop, name=id_tag + 'rec2', **cell_kwargs) if self.options.listener_dropout > 0.0: l_rec2_drop = DropoutLayer(l_rec2, p=self.options.listener_dropout, name=id_tag + 'rec2_drop') else: l_rec2_drop = l_rec2 l_hidden = DenseLayer(l_rec2_drop, num_units=self.options.listener_cell_size, nonlinearity=NONLINEARITIES[self.options.listener_nonlinearity], name=id_tag + 'hidden') if self.options.listener_dropout > 0.0: l_hidden_drop = DropoutLayer(l_hidden, p=self.options.listener_dropout, name=id_tag + 'hidden_drop') else: l_hidden_drop = l_hidden l_scores = DenseLayer(l_hidden_drop, num_units=self.context_len, nonlinearity=softmax, name=id_tag + 'scores') return l_scores, [l_in] + context_inputs
def inception_module(l_in, num_1x1, num_3x1_proj, reduce_3x1, num_3x1, reduce_5x1, num_5x1, batch_norm=False, gain=1.0, bias=0.1, nonlinearity=rectify): """ Inception module for sequences :param l_in: :param num_1x1: :param num_3x1_proj: :param reduce_3x1: :param num_3x1: :param reduce_5x1: :param num_5x1: :param gain: :param bias: :return: """ out_layers = [] # 1x1 if num_1x1 > 0: l_1x1 = NINLayer(l_in, num_units=num_1x1, W=lasagne.init.GlorotUniform(), b=None, nonlinearity=None, name='inception_1x1') l_1x1_bn = BatchNormalizeLayer(l_1x1, batch_norm, nonlinearity) out_layers.append(l_1x1_bn) # 3x1 if num_3x1 > 0: if reduce_3x1 > 0: l_reduce_3x1 = NINLayer(l_in, num_units=reduce_3x1, W=lasagne.init.GlorotUniform(), b=None, nonlinearity=None, name='inception_reduce_3x1') l_reduce_3x1_bn = BatchNormalizeLayer(l_reduce_3x1, batch_norm, nonlinearity) else: l_reduce_3x1_bn = l_in l_3x1 = Conv2DLayer(l_reduce_3x1_bn, num_filters=num_3x1, filter_size=(3, 1), pad="same", W=lasagne.init.GlorotUniform(), b=None, nonlinearity=None, name='inception_3x1') l_3x1_bn = BatchNormalizeLayer(l_3x1, batch_norm, nonlinearity) out_layers.append(l_3x1_bn) # 5x1 if num_5x1 > 0: if reduce_5x1 > 0: l_reduce_5x1 = NINLayer(l_in, num_units=reduce_5x1, W=lasagne.init.GlorotUniform(), b=None, nonlinearity=None, name='inception_reduce_5x1') l_reduce_5x1_bn = BatchNormalizeLayer(l_reduce_5x1, batch_norm, nonlinearity) else: l_reduce_5x1_bn = l_in l_5x1 = Conv2DLayer(l_reduce_5x1_bn, num_filters=num_5x1, filter_size=(3, 1), pad="same", W=lasagne.init.GlorotUniform(), b=None, nonlinearity=None, name='inception_5x1/1') l_5x1_bn = BatchNormalizeLayer(l_5x1, batch_norm, nonlinearity) l_5x1 = Conv2DLayer(l_5x1_bn, num_filters=num_5x1, filter_size=(3, 1), pad="same", W=lasagne.init.GlorotUniform(), b=None, nonlinearity=None, name='inception_5x1/2') l_5x1_bn = BatchNormalizeLayer(l_5x1, batch_norm, nonlinearity) out_layers.append(l_5x1_bn) if num_3x1_proj > 0: l_3x1_pool = MaxPool2DLayer(l_in, pool_size=(3, 1), stride=(1, 1), pad=(1, 0), name='inception_pool') l_3x1_proj = NINLayer(l_3x1_pool, num_units=num_3x1_proj, b=None, nonlinearity=None, name='inception_pool_proj') l_3x1_proj_bn = BatchNormalizeLayer(l_3x1_proj, batch_norm, nonlinearity) out_layers.append(l_3x1_proj_bn) # stack l_out = ConcatLayer(out_layers, axis=1, name='Inception module') return l_out