Example #1
0
def make_model(z, sample_size, dropword_p, n_classes, lstm_size, alpha):
    encoder = [
        OneHot(n_classes),
        LayoutRNNToCNN(),
        Convolution1d(3, 128, n_classes, pad=1, stride=2, causal=False, name="conv1"),
        BatchNormalization(128, name="bn1"),
        ReLU(),
        Convolution1d(3, 256, 128, pad=1, stride=2, causal=False, name="conv2"),
        BatchNormalization(256, name="bn2"),
        ReLU(),
        Convolution1d(3, 512, 256, pad=1, stride=2, causal=False, name="conv3"),
        BatchNormalization(512, name="bn3"),
        ReLU(),
        Convolution1d(3, 512, 512, pad=1, stride=2, causal=False, name="conv4"),
        BatchNormalization(512, name="bn4"),
        ReLU(),
        Convolution1d(3, 512, 512, pad=1, stride=2, causal=False, name="conv5"),
        BatchNormalization(512, name="bn5"),
        ReLU(),
        Flatten(),
        Linear(sample_size // (2**5) * 512, z * 2, name="fc_encode"),
        Sampler(z),
    ]
    decoder_from_z = [
        Linear(z, sample_size // (2**5) * 512, name="fc_decode"),
        ReLU(),
        Reshape((-1, 512, sample_size // (2**5), 1)),
        Deconvolution1D(512, 512, 3, pad=1, stride=2, name="deconv5"),
        BatchNormalization(512, name="deconv_bn5"),
        ReLU(),
        Deconvolution1D(512, 512, 3, pad=1, stride=2, name="deconv4"),
        BatchNormalization(512, name="deconv_bn4"),
        ReLU(),
        Deconvolution1D(512, 256, 3, pad=1, stride=2, name="deconv3"),
        BatchNormalization(256, name="deconv_bn3"),
        ReLU(),
        Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"),
        BatchNormalization(128, name="deconv_bn2"),
        ReLU(),
        Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"),
        BatchNormalization(200, name="deconv_bn1"),
        ReLU(),
        LayoutCNNToRNN(),
        Parallel([
            [
                Linear(200, n_classes, name="aux_classifier"),
                SoftMax(),
                Store()
            ],
            []
        ], shared_input=True),
        lambda x: x[1]
    ]

    start_word = n_classes
    dummy_word = n_classes + 1
    decoder_from_words = [
        Dropword(dropword_p, dummy_word=dummy_word),
        lambda x: T.concatenate([T.ones((1, x.shape[1]), dtype='int32') * start_word, x], axis=0),
        lambda x: x[:-1],
        OneHot(n_classes+2),
    ]
    layers = [
        Parallel([
            encoder,
            []
        ], shared_input=True),
        Parallel([
            decoder_from_z,
            decoder_from_words
        ], shared_input=False),
        lambda x: T.concatenate(x, axis=2),
        LNLSTM(200+n_classes+2, lstm_size, name="declstm"),
        Linear(lstm_size, n_classes, name="classifier"),
        SoftMax()
    ]

    model = LMReconstructionModel(layers, aux_loss=True, alpha=alpha)

    return model
Example #2
0
def make_model(z, net, sample_size, p, n_classes):
    if net == "conv":
        assert sample_size % 4 == 0
        layers = [
            OneHot(n_classes),
            LayoutRNNToCNN(),
            Convolution1d(3,
                          128,
                          n_classes,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv1"),
            BatchNormalization(128, name="bn1"),
            ReLU(),
            Convolution1d(3,
                          256,
                          128,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv2"),
            BatchNormalization(256, name="bn2"),
            ReLU(),
            Flatten(),
            Linear(sample_size / 4 * 256, z * 2, name="fc_encode"),
            Sampler(z),
            Linear(z, sample_size / 4 * 256, name="fc_decode"),
            ReLU(),
            Reshape((-1, 256, sample_size / 4, 1)),
            Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"),
            BatchNormalization(128, name="deconv_bn2"),
            ReLU(),
            Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"),
            BatchNormalization(200, name="deconv_bn1"),
            ReLU(),
            LayoutCNNToRNN(),
            Linear(200, n_classes, name="classifier"),
            SoftMax()
        ]
    elif net == "rnn":
        start_word = n_classes
        dummy_word = n_classes + 1
        layers = [
            Parallel(
                [[
                    OneHot(n_classes),
                    LNLSTM(n_classes, 500, name="enc"),
                    lambda x: x[-1],
                    Linear(500, z * 2, name="encoder_fc"),
                    Sampler(z),
                ],
                 [
                     Dropword(p, dummy_word=dummy_word),
                     lambda x: T.concatenate([
                         T.ones((1, x.shape[1]), dtype='int32') * start_word, x
                     ],
                                             axis=0),
                     lambda x: x[:-1],
                     OneHot(n_classes + 2),
                 ]]),
            ConditionalDecoderLNLSTM(n_classes + 2,
                                     z,
                                     500,
                                     name="dec",
                                     steps=sample_size),
            Linear(500, n_classes, name="classifier"),
            SoftMax()
        ]
    else:
        raise Exception("unknown net %s" % net)

    model = LMReconstructionModel(layers, aux_loss=False)

    return model
Example #3
0
def make_model(z, sample_size, dropword_p, n_classes, encdec_layers,
               charcnn_size, charcnn_layers, alpha):
    assert sample_size % (2**encdec_layers) == 0
    if encdec_layers == 2:
        encoder = [
            OneHot(n_classes),
            LayoutRNNToCNN(),
            Convolution1d(3,
                          128,
                          n_classes,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv1"),
            BatchNormalization(128, name="bn1", collect=False),
            ReLU(),
            Convolution1d(3,
                          256,
                          128,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv2"),
            BatchNormalization(256, name="bn2", collect=False),
            ReLU(),
            Flatten(),
            Linear(sample_size // 4 * 256, z * 2, name="fc_encode"),
            Sampler(z),
        ]
        decoder_from_z = [
            Linear(z, sample_size // 4 * 256, name="fc_decode"),
            ReLU(),
            Reshape((-1, 256, sample_size // 4, 1)),
            Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"),
            BatchNormalization(128, name="deconv_bn2", collect=False),
            ReLU(),
            Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"),
            BatchNormalization(200, name="deconv_bn1", collect=False),
            ReLU(),
            LayoutCNNToRNN(),
            Parallel([[
                Linear(200, n_classes, name="aux_classifier"),
                SoftMax(),
                Store()
            ], []],
                     shared_input=True), lambda x: x[1]
        ]
    elif encdec_layers == 3:
        encoder = [
            OneHot(n_classes),
            LayoutRNNToCNN(),
            Convolution1d(3,
                          128,
                          n_classes,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv1"),
            BatchNormalization(128, name="bn1"),
            ReLU(),
            Convolution1d(3,
                          256,
                          128,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv2"),
            BatchNormalization(256, name="bn2"),
            ReLU(),
            Convolution1d(3,
                          512,
                          256,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv3"),
            BatchNormalization(512, name="bn3"),
            ReLU(),
            Flatten(),
            Linear(sample_size // 8 * 512, z * 2, name="fc_encode"),
            Sampler(z),
        ]
        decoder_from_z = [
            Linear(z, sample_size // 8 * 512, name="fc_decode"),
            ReLU(),
            Reshape((-1, 512, sample_size // 8, 1)),
            Deconvolution1D(512, 256, 3, pad=1, stride=2, name="deconv3"),
            BatchNormalization(256, name="deconv_bn3", collect=False),
            ReLU(),
            Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"),
            BatchNormalization(128, name="deconv_bn2", collect=False),
            ReLU(),
            Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"),
            BatchNormalization(200, name="deconv_bn1", collect=False),
            ReLU(),
            LayoutCNNToRNN(),
            Parallel([[
                Linear(200, n_classes, name="aux_classifier"),
                SoftMax(),
                Store()
            ], []],
                     shared_input=True), lambda x: x[1]
        ]
    else:
        raise Exception("unsupported number of encdec layers %d" %
                        encdec_layers)

    start_word = n_classes
    dummy_word = n_classes + 1
    decoder_from_words = [
        Dropword(dropword_p, dummy_word=dummy_word),
        lambda x: T.concatenate(
            [T.ones((1, x.shape[1]), dtype='int32') * start_word, x], axis=0),
        lambda x: x[:-1],
        OneHot(n_classes + 2),
    ]
    layers = [
        Parallel([encoder, []], shared_input=True),
        Parallel([decoder_from_z, decoder_from_words], shared_input=False),
        lambda x: T.concatenate(x, axis=2),
        LayoutRNNToCNN(),
        Convolution1d(1,
                      charcnn_size * 2,
                      200 + n_classes + 2,
                      name="decconvresize"),
        BatchNormalization(charcnn_size * 2, name="decbnresize"),
        Gated(),
    ]
    for i in range(charcnn_layers):
        layers.append(
            HighwayConvolution1d(3,
                                 charcnn_size,
                                 dilation=1,
                                 name="decconv%d" % i))
    layers.extend([
        LayoutCNNToRNN(),
        Linear(charcnn_size, n_classes, name="classifier"),
        SoftMax()
    ])

    model = LMReconstructionModel(layers, aux_loss=True, alpha=alpha)

    return model