Beispiel #1
0
def build_pixelcnn_block(incoming, i):
    net = OrderedDict()

    nfilts = incoming.output_shape[1]  # nfilts = 2h
    net['full_deconv_A_{}'.format(i)] = Conv2DLayer(incoming,
                                                    num_filters=nfilts // 2,
                                                    filter_size=1,
                                                    name='conv_A')

    net['full_deconv_B_{}'.format(i)] = Conv2DLayer(net.values()[-1],
                                                    num_filters=nfilts // 2,
                                                    filter_size=3,
                                                    pad='same',
                                                    name='conv_B')
    f_shape = net.values()[-1].W.get_value(borrow=True).shape
    net.values()[-1].W *= get_mask(f_shape, 'B')

    net['full_deconv_C_{}'.format(i)] = Conv2DLayer(net.values()[-1],
                                                    num_filters=nfilts,
                                                    filter_size=1,
                                                    name='conv_C')

    # residual skip connection
    net['skip_{}'.format(i)] = ElemwiseMergeLayer(
        [incoming, net.values()[-1]], merge_function=T.add, name='add_convs')

    return net
Beispiel #2
0
def get_gated(l_inp, n_units, i, name, latent=None):
    net = OrderedDict()

    if isinstance(latent, InputLayer):
        if len(latent.output_shape) == 2:
            net['lat_proj_{}'.format(i)] = DenseLayer(latent,
                                                      num_units=n_units,
                                                      name='lat_proj')

            # dimshuffle to match the spatial dimensions
            net['dimshfl_{}'.format(i)] = dimshuffle(net.values()[-1],
                                                     pattern=(0, 1, 'x', 'x'),
                                                     name='dimshfl')
            lat = repeat([l_inp, net.values()[-1]], name='repeat')
        elif len(latent.output_shape) == 4:
            # y = f(h), using upsampling with deconv layer
            fsize = l_inp.output_shape[-1] - latent.output_shape[-1] + 1
            lat = net['lat_proj_{}'.format(i)] = deconv(latent,
                                                        num_filters=n_units,
                                                        filter_size=fsize,
                                                        name='lat_proj')
        else:
            raise NotImplementedError
        l_inp = net['lat_merge_{}'.format(i)] = ElemwiseMergeLayer(
            [l_inp, lat], merge_function=T.add, name='lat_merge')

    l_tanh = net['tanh_{}_{}'.format(name, i)] = NonlinearityLayer(
        SliceLayer(l_inp, indices=slice(0, n_units // 2), axis=1),
        nonlinearity=tanh,
        name='tanh_{}_slice'.format(name))

    l_sigmoid = net['sigmoid_{}_{}'.format(name, i)] = NonlinearityLayer(
        SliceLayer(l_inp, indices=slice(n_units // 2, None), axis=1),
        nonlinearity=sigmoid,
        name='sigmoid_{}_slice'.format(name))

    net['prod_merge_{}_{}'.format(name, i)] = ElemwiseMergeLayer(
        [l_tanh, l_sigmoid], T.mul, name='prod_merge_{}'.format(name))
    return net.values()[-1], net
Beispiel #3
0
 def layer(self):
     import theano.tensor as T
     from lasagne.layers.merge import ElemwiseMergeLayer
     return ElemwiseMergeLayer([Mock(), Mock()], merge_function=T.maximum)
 def layer(self):
     import theano.tensor as T
     from lasagne.layers.merge import ElemwiseMergeLayer
     l1 = Mock(output_shapes=((None, None), ))
     l2 = Mock(output_shapes=((None, None), ))
     return ElemwiseMergeLayer((l1, l2), merge_function=T.maximum)
Beispiel #5
0
def build_pixelcnn_block(incoming_vert,
                         incoming_hor,
                         fsize,
                         i,
                         masked=None,
                         latent=None):
    net = OrderedDict()
    # input (batch_size, n_features, n_rows, n_columns), n_features = p
    assert incoming_vert.output_shape[1] == incoming_hor.output_shape[1]
    nfilts = incoming_hor.output_shape[1]  # 2p

    # vertical nxn convolution part, fsize = (n,n)
    if masked:
        # either masked
        net['conv_vert_{}'.format(i)] = Conv2DLayer(incoming_vert,
                                                    num_filters=2 * nfilts,
                                                    filter_size=fsize,
                                                    pad='same',
                                                    nonlinearity=linear,
                                                    name='conv_vert')  # 2p
        f_shape = net.values()[-1].W.get_value(borrow=True).shape
        net.values()[-1].W *= get_mask(f_shape, 'A')
    else:
        # or (n//2+1, n) convolution with padding and croppding
        net['conv_vert_{}'.format(i)] = Conv2DLayer(
            incoming_vert,
            num_filters=2 * nfilts,
            filter_size=(fsize // 2 + 1, fsize),
            pad=(fsize // 2 + 1, fsize // 2),
            nonlinearity=linear,
            name='conv_vert')  # 2p

        # crop
        net['slice_vert'] = SliceLayer(net.values()[-1],
                                       indices=slice(0, -fsize // 2 - 1),
                                       axis=2,
                                       name='slice_vert')

    # vertical gated processing
    l_out_vert, gated_vert = get_gated(net.values()[-1], 2 * nfilts, i, 'vert',
                                       latent)
    net.update(gated_vert)  # p

    # vertical skip connection to horizontal stack
    net['full_conv_vert_{}'.format(i)] = Conv2DLayer(l_out_vert,
                                                     num_filters=2 * nfilts,
                                                     filter_size=1,
                                                     pad='same',
                                                     nonlinearity=linear,
                                                     name='full_conv_vert')
    skip_vert2hor = net.values()[-1]

    # horizontal 1xn convolution part, fsize = (1,n)
    if masked:
        net['conv_hor_{}'.format(i)] = Conv2DLayer(incoming_hor,
                                                   num_filters=2 * nfilts,
                                                   filter_size=(1, fsize),
                                                   pad='same',
                                                   nonlinearity=linear,
                                                   name='conv_hor')  # 2p
        f_shape = net.values()[-1].W.get_value(borrow=True).shape
        net.values()[-1].W *= get_mask(f_shape, 'A')
    else:
        net['conv_hor_{}'.format(i)] = Conv2DLayer(incoming_hor,
                                                   num_filters=2 * nfilts,
                                                   filter_size=(1, fsize // 2 +
                                                                1),
                                                   pad=(0, fsize // 2 + 1),
                                                   nonlinearity=linear,
                                                   name='conv_hor')  # 2p

        # crop
        net['slice_hor'] = SliceLayer(net.values()[-1],
                                      indices=slice(0, -fsize // 2 - 1),
                                      axis=3,
                                      name='slice_hor')

    # merge results of vertical and horizontal convolutions
    net['add_vert2hor_{}'.format(i)] = ElemwiseMergeLayer(
        [skip_vert2hor, net.values()[-1]], T.add, name='add_vert2hor')  # 2p

    # horizontal gated processing
    l_gated_hor, gated_hor = get_gated(net.values()[-1], 2 * nfilts, i, 'hor',
                                       latent)
    net.update(gated_hor)  # p

    # horizontal full convolution
    net['conv_hor_{}'.format(i)] = Conv2DLayer(l_gated_hor,
                                               num_filters=nfilts,
                                               filter_size=1,
                                               pad='same',
                                               nonlinearity=linear,
                                               name='conv_hor')

    # add horizontal skip connection
    net['add_skip2hor_{}'.format(i)] = ElemwiseMergeLayer(
        [net.values()[-1], incoming_hor], T.add, name='add_skip2hor')

    return net, l_out_vert, net.values()[-1]  # net, vert output, hor output
Beispiel #6
0
def build_pixelrnn_block(incoming, i, connected=False, learn_init=True):
    net = OrderedDict()
    num_units = incoming.output_shape[1] // 2

    net['skew_{}'.format(i)] = SkewLayer(incoming, name='skew')
    if connected:
        # igul implementation
        net['rnn_{}'.format(i)] = PixelLSTMLayer(net.values()[-1],
                                                 num_units=num_units,
                                                 learn_init=learn_init,
                                                 mask_type='B',
                                                 name='rnn_conn')
        net['bi_rnn_{}'.format(i)] = PixelLSTMLayer(net.values()[-1],
                                                    num_units=num_units,
                                                    learn_init=learn_init,
                                                    mask_type='B',
                                                    backwards=True,
                                                    name='birnn_conn')
    else:
        # original paper says:
        # Given the two output maps, to prevent the layer from seeing future
        # pixels, the right output map is then shifted down by one row and
        # added to the left output map
        skew_l = net.values()[-1]
        rnn_l = net['rnn_{}'.format(i)] = PixelLSTMLayer(skew_l,
                                                         num_units=num_units,
                                                         precompute_input=True,
                                                         learn_init=learn_init,
                                                         mask_type='B',
                                                         name='rnn')
        # W = net.values()[-1].W_in_to_ingate
        # f_shape = np.array(W.get_value(borrow=True).shape)
        # f_shape[1] *= 4
        # W *= get_mask(tuple(f_shape), 'B')

        net['bi_rnn_{}'.format(i)] = PixelLSTMLayer(skew_l,
                                                    num_units=num_units,
                                                    precompute_input=True,
                                                    learn_init=learn_init,
                                                    mask_type='B',
                                                    name='birnn')
        # W = net.values()[-1].W_in_to_ingate
        # f_shape = np.array(W.get_value(borrow=True).shape)
        # f_shape[1] *= 4
        # W *= get_mask(tuple(f_shape), 'B')

        # slice the last row
        net['slice_last_row'] = SliceLayer(net.values()[-1],
                                           indices=slice(0, -1),
                                           axis=2,
                                           name='slice_birnn')

        # pad first row with zeros
        net['pad'] = pad(net.values()[-1],
                         width=[(1, 0)],
                         val=0,
                         batch_ndim=2,
                         name='pad_birnn')

        # add together
        net['rnn_out'] = ElemwiseMergeLayer([rnn_l, net.values()[-1]],
                                            merge_function=T.add,
                                            name='add_rnns')

    net['unskew_{}'.format(i)] = UnSkewLayer(net.values()[-1], name='skew')

    # 1x1 upsampling by full convolution
    nfilts = incoming.output_shape[1]
    net['full_deconv_{}'.format(i)] = Conv2DLayer(net.values()[-1],
                                                  num_filters=nfilts,
                                                  filter_size=1,
                                                  name='full_conv')

    # residual skip connection
    net['skip_{}'.format(i)] = ElemwiseMergeLayer(
        [incoming, net.values()[-1]], merge_function=T.add, name='add_rnns')

    return net
def build_nmt_encoder_decoder(dim_word=1,
                              n_embd=100,
                              n_units=500,
                              n_proj=200,
                              state=None,
                              rev_state=None,
                              context_type=None,
                              attention=False,
                              drop_p=None):
    enc = OrderedDict()
    enc['input'] = InputLayer((None, None), name='input')
    enc_mask = enc['mask'] = InputLayer((None, None), name='mask')
    enc_rev_mask = enc['rev_mask'] = InputLayer((None, None), name='rev_mask')

    enc['input_emb'] = EmbeddingLayer(enc.values()[-1],
                                      input_size=dim_word,
                                      output_size=n_embd,
                                      name='input_emb')

    ### ENCODER PART ###
    # rnn encoder unit
    hid_init = Constant(0.)
    hid_init_rev = Constant(0.)
    encoder_unit = get_rnn_unit(enc.values()[-1],
                                enc_mask,
                                enc_rev_mask,
                                hid_init,
                                hid_init_rev,
                                n_units,
                                prefix='encoder_')
    enc.update(encoder_unit)

    # context layer = decoder's initial state of shape (batch_size, num_units)
    context = enc.values()[-1]  # net['context']
    if context_type == 'last':
        enc['context2init'] = SliceLayer(context,
                                         indices=-1,
                                         axis=1,
                                         name='last_encoder_context')
    elif context_type == 'mean':
        enc['context2init'] = ExpressionLayer(context,
                                              mean_over_1_axis,
                                              output_shape='auto',
                                              name='mean_encoder_context')

    ### DECODER PART ###
    W_init2proj, b_init2proj = GlorotUniform(), Constant(0.)

    enc['init_state'] = DenseLayer(enc['context2init'],
                                   num_units=n_units,
                                   W=W_init2proj,
                                   b=b_init2proj,
                                   nonlinearity=tanh,
                                   name='decoder_init_state')
    if state is None:
        init_state = enc['init_state']
        init_state_rev = None  #if rev_state is None else init_state
        if not attention:
            # if simple attetion the context is 2D, else 3D
            context = enc['context2init']
    else:
        init_state = state
        init_state_rev = rev_state
        context = enc['context_input'] = \
            InputLayer((None, n_units), name='ctx_input')
    # (batch_size, nfeats)

    # (batch_size, valid ntsteps)
    enc['target'] = InputLayer((None, None), name='target')
    dec_mask = enc['target_mask'] = InputLayer((None, None),
                                               name='target_mask')

    enc['target_emb'] = EmbeddingLayer(enc.values()[-1],
                                       input_size=dim_word,
                                       output_size=n_embd,
                                       name='target_emb')
    prevdim = n_embd
    prev2rnn = enc.values()[-1]  # it's either emb or prev2rnn/noise

    decoder_unit = get_rnn_unit(prev2rnn,
                                dec_mask,
                                None,
                                init_state,
                                None,
                                n_units,
                                prefix='decoder_',
                                context=context,
                                attention=attention)
    enc.update(decoder_unit)

    if attention:
        ctxs = enc.values()[-1]
        ctxs_shape = ctxs.output_shape

        def get_ctx(x):
            return ctxs.ctx

        context = enc['context'] = ExpressionLayer(ctxs,
                                                   function=get_ctx,
                                                   output_shape=ctxs_shape,
                                                   name='context')

    # return all values'
    # reshape for feed-forward layer
    # 2D shapes of (batch_size * num_steps, num_units/num_feats)
    enc['rnn2proj'] = rnn2proj = ReshapeLayer(enc.values()[-1], (-1, n_units),
                                              name='flatten_rnn2proj')

    enc['prev2proj'] = prev2proj = ReshapeLayer(prev2rnn, (-1, prevdim),
                                                name='flatten_prev')

    if isinstance(context, ExpressionLayer):
        ctx2proj = enc['ctx2proj'] = ReshapeLayer(context,
                                                  (-1, ctxs_shape[-1]),
                                                  name='flatten_ctxs')
    else:
        ctx2proj = context

    # load shared parameters
    W_rnn2proj, b_rnn2proj = GlorotUniform(), Constant(0.)
    W_prev2proj, b_prev2proj = GlorotUniform(), Constant(0.)
    W_ctx2proj, b_ctx2proj = GlorotUniform(), Constant(0.)

    # perturb rnn-to-projection by noise
    if drop_p is not None:
        rnn2proj = enc['noise_rnn2proj'] = DropoutLayer(rnn2proj,
                                                        sigma=drop_p,
                                                        name='noise_rnn2proj')

        prev2proj = enc['drop_prev2proj'] = DropoutLayer(prev2proj,
                                                         sigma=drop_p,
                                                         name='drop_prev2proj')

        ctx2proj = enc['noise_ctx2proj'] = DropoutLayer(ctx2proj,
                                                        sigma=drop_p,
                                                        name='noise_ctx2proj')

    # project rnn
    enc['rnn_proj'] = DenseLayer(rnn2proj,
                                 num_units=n_proj,
                                 nonlinearity=linear,
                                 W=W_rnn2proj,
                                 b=b_rnn2proj,
                                 name='rnn_proj')

    # project raw targets
    enc['prev_proj'] = DenseLayer(prev2proj,
                                  num_units=n_proj,
                                  nonlinearity=linear,
                                  W=W_prev2proj,
                                  b=b_prev2proj,
                                  name='prev_proj')

    # project context
    enc['ctx_proj'] = DenseLayer(ctx2proj,
                                 num_units=n_proj,
                                 nonlinearity=linear,
                                 W=W_ctx2proj,
                                 b=b_ctx2proj,
                                 name='ctx_proj')

    # reshape back for merging
    n_batch = enc['input'].input_var.shape[0]
    rnn2merge = enc['rnn2merge'] = ReshapeLayer(enc['rnn_proj'],
                                                (n_batch, -1, n_proj),
                                                name='reshaped_rnn2proj')

    prev2merge = enc['prev2merge'] = ReshapeLayer(enc['prev_proj'],
                                                  (n_batch, -1, n_proj),
                                                  name='reshaped_prev')

    if isinstance(context, ExpressionLayer):
        ctx2merge = ReshapeLayer(enc['ctx_proj'], (n_batch, -1, n_proj),
                                 name='reshaped_prev')
    else:
        ctx2merge = enc['ctx2merge'] = DimshuffleLayer(enc['ctx_proj'],
                                                       pattern=(0, 'x', 1),
                                                       name='reshaped_context')

    # combine projections into shape (batch_size, n_steps, n_proj)
    enc['proj_merge'] = ElemwiseMergeLayer([rnn2merge, prev2merge, ctx2merge],
                                           merge_function=tanh_add,
                                           name='proj_merge')

    # reshape for output regression projection
    enc['merge2proj'] = ReshapeLayer(enc.values()[-1], (-1, n_proj),
                                     name='flatten_proj_merge')

    # perturb concatenated regressors by noise
    if drop_p is not None:
        # if noise_type == 'binary':
        enc['noise_output'] = DropoutLayer(enc.values()[-1],
                                           p=drop_p,
                                           name='noise_output')

    # regress on combined (perturbed) projections
    out = get_output_unit(enc['target'], enc.values()[-1], dim_word)
    enc.update(out)  # update graph

    return enc
Beispiel #8
0
def build_wavenet(n_channels,
                  seq_length,
                  specs,
                  out_dim=256,
                  out_fn=softmax,
                  latent=None):
    net = OrderedDict()

    if latent:
        net['latent'] = latent

    net['input'] = InputLayer((None, seq_length, n_channels), name='input')
    input_shape = net['input'].input_var.shape
    net['input_dimshfl'] = dimshuffle(net.values()[-1],
                                      pattern=(0, 2, 'x', 1),
                                      name='input_dimshfl')

    nfilts, fsize = specs.pop(0)
    net['causal_conv_0'] = pad(conv(net.values()[-1],
                                    num_filters=nfilts,
                                    filter_size=(1, fsize)),
                               width=[(fsize - 1, 0)],
                               val=0,
                               batch_ndim=3,
                               name='causal_conv')

    skips = []
    for i, spec in enumerate(specs):
        l_inp = net.values()[-1]
        skip, wavenet_block = build_wavenet_block(l_inp,
                                                  i + 1,
                                                  specs.pop(0),
                                                  latent=latent)
        skips.append(skip)
        net.update(wavenet_block)

    net['skip_merge'] = NonlinearityLayer(ElemwiseMergeLayer([l_inp] + skips,
                                                             T.add),
                                          nonlinearity=rectify,
                                          name='skip_merge')

    net['pre_out'] = conv(net.values()[-1],
                          num_filters=nfilts,
                          filter_size=1,
                          nonlinearity=rectify,
                          name='pre_out')

    # num_filters = ouput_dim
    net['output'] = conv(net.values()[-1],
                         num_filters=out_dim,
                         filter_size=1,
                         nonlinearity=out_fn,
                         name='output')

    net['output_dimshfl'] = dimshuffle(net.values()[-1],
                                       pattern=(0, 3, 1, 2),
                                       name='output_dimshfl')

    output_shape = (input_shape[0], input_shape[1], out_dim)
    net['output_reshape'] = reshape(net.values()[-1],
                                    shape=output_shape,
                                    name='output_reshape')

    return net, net.values()[-1]
Beispiel #9
0
def build_wavenet_block(incoming, i, specs, latent=None):
    # input (batch_size, num_units, 1, seq_len)
    net = OrderedDict()

    # dilated causal convolution
    nfilts = incoming.output_shape[1]
    fsize, dil = specs  # nfilts = 2p, fsize=(1,fs), dil=(1,d)
    net['dil_causal_conv_{}'.format(i)] = pad(dilate(incoming,
                                                     num_filters=nfilts,
                                                     filter_size=(1, fsize),
                                                     dilation=(1, dil)),
                                              width=[(dil, 0)],
                                              val=0,
                                              batch_ndim=3,
                                              name='dil_causal_conv')
    l_inp = net.values()[-1]

    if isinstance(latent, InputLayer):
        if len(latent.output_shape) == 2:
            net['lat_proj_{}'.format(i)] = DenseLayer(latent,
                                                      num_units=nfilts,
                                                      name='lat_proj')

            # dimshuffle to match the spatial dimensions
            net['dimshfl_{}'.format(i)] = dimshuffle(net.values()[-1],
                                                     pattern=(0, 1, 'x', 'x'),
                                                     name='dimshfl')
            lat = repeat([l_inp, net.values()[-1]], name='repeat')
        elif len(latent.output_shape) == 4:
            # y = f(h), using upsampling with deconv layer
            # input_l = lasagne.layers.helper.get_all_layers(l_inp)[0]
            fsize = l_inp.output_shape[-1] - latent.output_shape[-1] + 1
            lat = net['lat_proj_{}'.format(i)] = deconv(latent,
                                                        num_filters=nfilts,
                                                        filter_size=(1, fsize),
                                                        name='lat_proj')
        else:
            raise NotImplementedError
        # print(l_inp.output_shape, lat.output_shape)
        l_inp = net['lat_merge_{}'.format(i)] = ElemwiseMergeLayer(
            [l_inp, lat], merge_function=T.add, name='lat_merge')

    # print(l_inp.output_shape)
    # tanh gate
    tanh_sl = net['tanh_slice_{}'.format(i)] = NonlinearityLayer(
        SliceLayer(l_inp, indices=slice(0, nfilts // 2), axis=1),
        nonlinearity=tanh,
        name='tanh_slice')

    # sigmoid gate
    sigmoid_sl = net['sigmoid_slice_{}'.format(i)] = NonlinearityLayer(
        SliceLayer(l_inp, indices=slice(nfilts // 2, None), axis=1),
        nonlinearity=sigmoid,
        name='sigmoid_slice')

    # elementwise merging by pro
    net['prod_merge_{}'.format(i)] = ElemwiseMergeLayer([tanh_sl, sigmoid_sl],
                                                        T.mul,
                                                        name='prod_merge')

    # skip connection
    skip = net['full_conv_{}'.format(i)] = conv(net.values()[-1],
                                                num_filters=nfilts,
                                                filter_size=1,
                                                nonlinearity=linear,
                                                name='full_conv')

    # elementwise mergig by addition
    net['res_out_{}'.format(i)] = ElemwiseMergeLayer([incoming, skip],
                                                     T.add,
                                                     name='res_out')

    # print(net.values()[-1].input_shapes)
    return skip, net