def build_pixelcnn_block(incoming, i): net = OrderedDict() nfilts = incoming.output_shape[1] # nfilts = 2h net['full_deconv_A_{}'.format(i)] = Conv2DLayer(incoming, num_filters=nfilts // 2, filter_size=1, name='conv_A') net['full_deconv_B_{}'.format(i)] = Conv2DLayer(net.values()[-1], num_filters=nfilts // 2, filter_size=3, pad='same', name='conv_B') f_shape = net.values()[-1].W.get_value(borrow=True).shape net.values()[-1].W *= get_mask(f_shape, 'B') net['full_deconv_C_{}'.format(i)] = Conv2DLayer(net.values()[-1], num_filters=nfilts, filter_size=1, name='conv_C') # residual skip connection net['skip_{}'.format(i)] = ElemwiseMergeLayer( [incoming, net.values()[-1]], merge_function=T.add, name='add_convs') return net
def get_gated(l_inp, n_units, i, name, latent=None): net = OrderedDict() if isinstance(latent, InputLayer): if len(latent.output_shape) == 2: net['lat_proj_{}'.format(i)] = DenseLayer(latent, num_units=n_units, name='lat_proj') # dimshuffle to match the spatial dimensions net['dimshfl_{}'.format(i)] = dimshuffle(net.values()[-1], pattern=(0, 1, 'x', 'x'), name='dimshfl') lat = repeat([l_inp, net.values()[-1]], name='repeat') elif len(latent.output_shape) == 4: # y = f(h), using upsampling with deconv layer fsize = l_inp.output_shape[-1] - latent.output_shape[-1] + 1 lat = net['lat_proj_{}'.format(i)] = deconv(latent, num_filters=n_units, filter_size=fsize, name='lat_proj') else: raise NotImplementedError l_inp = net['lat_merge_{}'.format(i)] = ElemwiseMergeLayer( [l_inp, lat], merge_function=T.add, name='lat_merge') l_tanh = net['tanh_{}_{}'.format(name, i)] = NonlinearityLayer( SliceLayer(l_inp, indices=slice(0, n_units // 2), axis=1), nonlinearity=tanh, name='tanh_{}_slice'.format(name)) l_sigmoid = net['sigmoid_{}_{}'.format(name, i)] = NonlinearityLayer( SliceLayer(l_inp, indices=slice(n_units // 2, None), axis=1), nonlinearity=sigmoid, name='sigmoid_{}_slice'.format(name)) net['prod_merge_{}_{}'.format(name, i)] = ElemwiseMergeLayer( [l_tanh, l_sigmoid], T.mul, name='prod_merge_{}'.format(name)) return net.values()[-1], net
def layer(self): import theano.tensor as T from lasagne.layers.merge import ElemwiseMergeLayer return ElemwiseMergeLayer([Mock(), Mock()], merge_function=T.maximum)
def layer(self): import theano.tensor as T from lasagne.layers.merge import ElemwiseMergeLayer l1 = Mock(output_shapes=((None, None), )) l2 = Mock(output_shapes=((None, None), )) return ElemwiseMergeLayer((l1, l2), merge_function=T.maximum)
def build_pixelcnn_block(incoming_vert, incoming_hor, fsize, i, masked=None, latent=None): net = OrderedDict() # input (batch_size, n_features, n_rows, n_columns), n_features = p assert incoming_vert.output_shape[1] == incoming_hor.output_shape[1] nfilts = incoming_hor.output_shape[1] # 2p # vertical nxn convolution part, fsize = (n,n) if masked: # either masked net['conv_vert_{}'.format(i)] = Conv2DLayer(incoming_vert, num_filters=2 * nfilts, filter_size=fsize, pad='same', nonlinearity=linear, name='conv_vert') # 2p f_shape = net.values()[-1].W.get_value(borrow=True).shape net.values()[-1].W *= get_mask(f_shape, 'A') else: # or (n//2+1, n) convolution with padding and croppding net['conv_vert_{}'.format(i)] = Conv2DLayer( incoming_vert, num_filters=2 * nfilts, filter_size=(fsize // 2 + 1, fsize), pad=(fsize // 2 + 1, fsize // 2), nonlinearity=linear, name='conv_vert') # 2p # crop net['slice_vert'] = SliceLayer(net.values()[-1], indices=slice(0, -fsize // 2 - 1), axis=2, name='slice_vert') # vertical gated processing l_out_vert, gated_vert = get_gated(net.values()[-1], 2 * nfilts, i, 'vert', latent) net.update(gated_vert) # p # vertical skip connection to horizontal stack net['full_conv_vert_{}'.format(i)] = Conv2DLayer(l_out_vert, num_filters=2 * nfilts, filter_size=1, pad='same', nonlinearity=linear, name='full_conv_vert') skip_vert2hor = net.values()[-1] # horizontal 1xn convolution part, fsize = (1,n) if masked: net['conv_hor_{}'.format(i)] = Conv2DLayer(incoming_hor, num_filters=2 * nfilts, filter_size=(1, fsize), pad='same', nonlinearity=linear, name='conv_hor') # 2p f_shape = net.values()[-1].W.get_value(borrow=True).shape net.values()[-1].W *= get_mask(f_shape, 'A') else: net['conv_hor_{}'.format(i)] = Conv2DLayer(incoming_hor, num_filters=2 * nfilts, filter_size=(1, fsize // 2 + 1), pad=(0, fsize // 2 + 1), nonlinearity=linear, name='conv_hor') # 2p # crop net['slice_hor'] = SliceLayer(net.values()[-1], indices=slice(0, -fsize // 2 - 1), axis=3, name='slice_hor') # merge results of vertical and horizontal convolutions net['add_vert2hor_{}'.format(i)] = ElemwiseMergeLayer( [skip_vert2hor, net.values()[-1]], T.add, name='add_vert2hor') # 2p # horizontal gated processing l_gated_hor, gated_hor = get_gated(net.values()[-1], 2 * nfilts, i, 'hor', latent) net.update(gated_hor) # p # horizontal full convolution net['conv_hor_{}'.format(i)] = Conv2DLayer(l_gated_hor, num_filters=nfilts, filter_size=1, pad='same', nonlinearity=linear, name='conv_hor') # add horizontal skip connection net['add_skip2hor_{}'.format(i)] = ElemwiseMergeLayer( [net.values()[-1], incoming_hor], T.add, name='add_skip2hor') return net, l_out_vert, net.values()[-1] # net, vert output, hor output
def build_pixelrnn_block(incoming, i, connected=False, learn_init=True): net = OrderedDict() num_units = incoming.output_shape[1] // 2 net['skew_{}'.format(i)] = SkewLayer(incoming, name='skew') if connected: # igul implementation net['rnn_{}'.format(i)] = PixelLSTMLayer(net.values()[-1], num_units=num_units, learn_init=learn_init, mask_type='B', name='rnn_conn') net['bi_rnn_{}'.format(i)] = PixelLSTMLayer(net.values()[-1], num_units=num_units, learn_init=learn_init, mask_type='B', backwards=True, name='birnn_conn') else: # original paper says: # Given the two output maps, to prevent the layer from seeing future # pixels, the right output map is then shifted down by one row and # added to the left output map skew_l = net.values()[-1] rnn_l = net['rnn_{}'.format(i)] = PixelLSTMLayer(skew_l, num_units=num_units, precompute_input=True, learn_init=learn_init, mask_type='B', name='rnn') # W = net.values()[-1].W_in_to_ingate # f_shape = np.array(W.get_value(borrow=True).shape) # f_shape[1] *= 4 # W *= get_mask(tuple(f_shape), 'B') net['bi_rnn_{}'.format(i)] = PixelLSTMLayer(skew_l, num_units=num_units, precompute_input=True, learn_init=learn_init, mask_type='B', name='birnn') # W = net.values()[-1].W_in_to_ingate # f_shape = np.array(W.get_value(borrow=True).shape) # f_shape[1] *= 4 # W *= get_mask(tuple(f_shape), 'B') # slice the last row net['slice_last_row'] = SliceLayer(net.values()[-1], indices=slice(0, -1), axis=2, name='slice_birnn') # pad first row with zeros net['pad'] = pad(net.values()[-1], width=[(1, 0)], val=0, batch_ndim=2, name='pad_birnn') # add together net['rnn_out'] = ElemwiseMergeLayer([rnn_l, net.values()[-1]], merge_function=T.add, name='add_rnns') net['unskew_{}'.format(i)] = UnSkewLayer(net.values()[-1], name='skew') # 1x1 upsampling by full convolution nfilts = incoming.output_shape[1] net['full_deconv_{}'.format(i)] = Conv2DLayer(net.values()[-1], num_filters=nfilts, filter_size=1, name='full_conv') # residual skip connection net['skip_{}'.format(i)] = ElemwiseMergeLayer( [incoming, net.values()[-1]], merge_function=T.add, name='add_rnns') return net
def build_nmt_encoder_decoder(dim_word=1, n_embd=100, n_units=500, n_proj=200, state=None, rev_state=None, context_type=None, attention=False, drop_p=None): enc = OrderedDict() enc['input'] = InputLayer((None, None), name='input') enc_mask = enc['mask'] = InputLayer((None, None), name='mask') enc_rev_mask = enc['rev_mask'] = InputLayer((None, None), name='rev_mask') enc['input_emb'] = EmbeddingLayer(enc.values()[-1], input_size=dim_word, output_size=n_embd, name='input_emb') ### ENCODER PART ### # rnn encoder unit hid_init = Constant(0.) hid_init_rev = Constant(0.) encoder_unit = get_rnn_unit(enc.values()[-1], enc_mask, enc_rev_mask, hid_init, hid_init_rev, n_units, prefix='encoder_') enc.update(encoder_unit) # context layer = decoder's initial state of shape (batch_size, num_units) context = enc.values()[-1] # net['context'] if context_type == 'last': enc['context2init'] = SliceLayer(context, indices=-1, axis=1, name='last_encoder_context') elif context_type == 'mean': enc['context2init'] = ExpressionLayer(context, mean_over_1_axis, output_shape='auto', name='mean_encoder_context') ### DECODER PART ### W_init2proj, b_init2proj = GlorotUniform(), Constant(0.) enc['init_state'] = DenseLayer(enc['context2init'], num_units=n_units, W=W_init2proj, b=b_init2proj, nonlinearity=tanh, name='decoder_init_state') if state is None: init_state = enc['init_state'] init_state_rev = None #if rev_state is None else init_state if not attention: # if simple attetion the context is 2D, else 3D context = enc['context2init'] else: init_state = state init_state_rev = rev_state context = enc['context_input'] = \ InputLayer((None, n_units), name='ctx_input') # (batch_size, nfeats) # (batch_size, valid ntsteps) enc['target'] = InputLayer((None, None), name='target') dec_mask = enc['target_mask'] = InputLayer((None, None), name='target_mask') enc['target_emb'] = EmbeddingLayer(enc.values()[-1], input_size=dim_word, output_size=n_embd, name='target_emb') prevdim = n_embd prev2rnn = enc.values()[-1] # it's either emb or prev2rnn/noise decoder_unit = get_rnn_unit(prev2rnn, dec_mask, None, init_state, None, n_units, prefix='decoder_', context=context, attention=attention) enc.update(decoder_unit) if attention: ctxs = enc.values()[-1] ctxs_shape = ctxs.output_shape def get_ctx(x): return ctxs.ctx context = enc['context'] = ExpressionLayer(ctxs, function=get_ctx, output_shape=ctxs_shape, name='context') # return all values' # reshape for feed-forward layer # 2D shapes of (batch_size * num_steps, num_units/num_feats) enc['rnn2proj'] = rnn2proj = ReshapeLayer(enc.values()[-1], (-1, n_units), name='flatten_rnn2proj') enc['prev2proj'] = prev2proj = ReshapeLayer(prev2rnn, (-1, prevdim), name='flatten_prev') if isinstance(context, ExpressionLayer): ctx2proj = enc['ctx2proj'] = ReshapeLayer(context, (-1, ctxs_shape[-1]), name='flatten_ctxs') else: ctx2proj = context # load shared parameters W_rnn2proj, b_rnn2proj = GlorotUniform(), Constant(0.) W_prev2proj, b_prev2proj = GlorotUniform(), Constant(0.) W_ctx2proj, b_ctx2proj = GlorotUniform(), Constant(0.) # perturb rnn-to-projection by noise if drop_p is not None: rnn2proj = enc['noise_rnn2proj'] = DropoutLayer(rnn2proj, sigma=drop_p, name='noise_rnn2proj') prev2proj = enc['drop_prev2proj'] = DropoutLayer(prev2proj, sigma=drop_p, name='drop_prev2proj') ctx2proj = enc['noise_ctx2proj'] = DropoutLayer(ctx2proj, sigma=drop_p, name='noise_ctx2proj') # project rnn enc['rnn_proj'] = DenseLayer(rnn2proj, num_units=n_proj, nonlinearity=linear, W=W_rnn2proj, b=b_rnn2proj, name='rnn_proj') # project raw targets enc['prev_proj'] = DenseLayer(prev2proj, num_units=n_proj, nonlinearity=linear, W=W_prev2proj, b=b_prev2proj, name='prev_proj') # project context enc['ctx_proj'] = DenseLayer(ctx2proj, num_units=n_proj, nonlinearity=linear, W=W_ctx2proj, b=b_ctx2proj, name='ctx_proj') # reshape back for merging n_batch = enc['input'].input_var.shape[0] rnn2merge = enc['rnn2merge'] = ReshapeLayer(enc['rnn_proj'], (n_batch, -1, n_proj), name='reshaped_rnn2proj') prev2merge = enc['prev2merge'] = ReshapeLayer(enc['prev_proj'], (n_batch, -1, n_proj), name='reshaped_prev') if isinstance(context, ExpressionLayer): ctx2merge = ReshapeLayer(enc['ctx_proj'], (n_batch, -1, n_proj), name='reshaped_prev') else: ctx2merge = enc['ctx2merge'] = DimshuffleLayer(enc['ctx_proj'], pattern=(0, 'x', 1), name='reshaped_context') # combine projections into shape (batch_size, n_steps, n_proj) enc['proj_merge'] = ElemwiseMergeLayer([rnn2merge, prev2merge, ctx2merge], merge_function=tanh_add, name='proj_merge') # reshape for output regression projection enc['merge2proj'] = ReshapeLayer(enc.values()[-1], (-1, n_proj), name='flatten_proj_merge') # perturb concatenated regressors by noise if drop_p is not None: # if noise_type == 'binary': enc['noise_output'] = DropoutLayer(enc.values()[-1], p=drop_p, name='noise_output') # regress on combined (perturbed) projections out = get_output_unit(enc['target'], enc.values()[-1], dim_word) enc.update(out) # update graph return enc
def build_wavenet(n_channels, seq_length, specs, out_dim=256, out_fn=softmax, latent=None): net = OrderedDict() if latent: net['latent'] = latent net['input'] = InputLayer((None, seq_length, n_channels), name='input') input_shape = net['input'].input_var.shape net['input_dimshfl'] = dimshuffle(net.values()[-1], pattern=(0, 2, 'x', 1), name='input_dimshfl') nfilts, fsize = specs.pop(0) net['causal_conv_0'] = pad(conv(net.values()[-1], num_filters=nfilts, filter_size=(1, fsize)), width=[(fsize - 1, 0)], val=0, batch_ndim=3, name='causal_conv') skips = [] for i, spec in enumerate(specs): l_inp = net.values()[-1] skip, wavenet_block = build_wavenet_block(l_inp, i + 1, specs.pop(0), latent=latent) skips.append(skip) net.update(wavenet_block) net['skip_merge'] = NonlinearityLayer(ElemwiseMergeLayer([l_inp] + skips, T.add), nonlinearity=rectify, name='skip_merge') net['pre_out'] = conv(net.values()[-1], num_filters=nfilts, filter_size=1, nonlinearity=rectify, name='pre_out') # num_filters = ouput_dim net['output'] = conv(net.values()[-1], num_filters=out_dim, filter_size=1, nonlinearity=out_fn, name='output') net['output_dimshfl'] = dimshuffle(net.values()[-1], pattern=(0, 3, 1, 2), name='output_dimshfl') output_shape = (input_shape[0], input_shape[1], out_dim) net['output_reshape'] = reshape(net.values()[-1], shape=output_shape, name='output_reshape') return net, net.values()[-1]
def build_wavenet_block(incoming, i, specs, latent=None): # input (batch_size, num_units, 1, seq_len) net = OrderedDict() # dilated causal convolution nfilts = incoming.output_shape[1] fsize, dil = specs # nfilts = 2p, fsize=(1,fs), dil=(1,d) net['dil_causal_conv_{}'.format(i)] = pad(dilate(incoming, num_filters=nfilts, filter_size=(1, fsize), dilation=(1, dil)), width=[(dil, 0)], val=0, batch_ndim=3, name='dil_causal_conv') l_inp = net.values()[-1] if isinstance(latent, InputLayer): if len(latent.output_shape) == 2: net['lat_proj_{}'.format(i)] = DenseLayer(latent, num_units=nfilts, name='lat_proj') # dimshuffle to match the spatial dimensions net['dimshfl_{}'.format(i)] = dimshuffle(net.values()[-1], pattern=(0, 1, 'x', 'x'), name='dimshfl') lat = repeat([l_inp, net.values()[-1]], name='repeat') elif len(latent.output_shape) == 4: # y = f(h), using upsampling with deconv layer # input_l = lasagne.layers.helper.get_all_layers(l_inp)[0] fsize = l_inp.output_shape[-1] - latent.output_shape[-1] + 1 lat = net['lat_proj_{}'.format(i)] = deconv(latent, num_filters=nfilts, filter_size=(1, fsize), name='lat_proj') else: raise NotImplementedError # print(l_inp.output_shape, lat.output_shape) l_inp = net['lat_merge_{}'.format(i)] = ElemwiseMergeLayer( [l_inp, lat], merge_function=T.add, name='lat_merge') # print(l_inp.output_shape) # tanh gate tanh_sl = net['tanh_slice_{}'.format(i)] = NonlinearityLayer( SliceLayer(l_inp, indices=slice(0, nfilts // 2), axis=1), nonlinearity=tanh, name='tanh_slice') # sigmoid gate sigmoid_sl = net['sigmoid_slice_{}'.format(i)] = NonlinearityLayer( SliceLayer(l_inp, indices=slice(nfilts // 2, None), axis=1), nonlinearity=sigmoid, name='sigmoid_slice') # elementwise merging by pro net['prod_merge_{}'.format(i)] = ElemwiseMergeLayer([tanh_sl, sigmoid_sl], T.mul, name='prod_merge') # skip connection skip = net['full_conv_{}'.format(i)] = conv(net.values()[-1], num_filters=nfilts, filter_size=1, nonlinearity=linear, name='full_conv') # elementwise mergig by addition net['res_out_{}'.format(i)] = ElemwiseMergeLayer([incoming, skip], T.add, name='res_out') # print(net.values()[-1].input_shapes) return skip, net