Example #1
0
def make_model(n_classes, charcnn_size, charcnn_layers):
    layers = [
        OneHot(n_classes + 1),
        LayoutRNNToCNN(),
        Convolution1d(1, charcnn_size * 2, n_classes + 1,
                      name="decconvresize"),
        BatchNormalization(charcnn_size * 2, name="decbnresize"),
        Gated(),
    ]
    for i in range(charcnn_layers):
        layers.append(
            HighwayConvolution1d(3,
                                 charcnn_size,
                                 dilation=1,
                                 name="decconv%d" % i))
    layers.extend([
        LayoutCNNToRNN(),
        Linear(charcnn_size, n_classes, name="classifier"),
        SoftMax()
    ])

    model = LMModel(layers)

    return model
Example #2
0
def make_model(z, net, sample_size, p, n_classes):
    if net == "conv":
        assert sample_size % 4 == 0
        layers = [
            OneHot(n_classes),
            LayoutRNNToCNN(),
            Convolution1d(3,
                          128,
                          n_classes,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv1"),
            BatchNormalization(128, name="bn1"),
            ReLU(),
            Convolution1d(3,
                          256,
                          128,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv2"),
            BatchNormalization(256, name="bn2"),
            ReLU(),
            Flatten(),
            Linear(sample_size / 4 * 256, z * 2, name="fc_encode"),
            Sampler(z),
            Linear(z, sample_size / 4 * 256, name="fc_decode"),
            ReLU(),
            Reshape((-1, 256, sample_size / 4, 1)),
            Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"),
            BatchNormalization(128, name="deconv_bn2"),
            ReLU(),
            Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"),
            BatchNormalization(200, name="deconv_bn1"),
            ReLU(),
            LayoutCNNToRNN(),
            Linear(200, n_classes, name="classifier"),
            SoftMax()
        ]
    elif net == "rnn":
        start_word = n_classes
        dummy_word = n_classes + 1
        layers = [
            Parallel(
                [[
                    OneHot(n_classes),
                    LNLSTM(n_classes, 500, name="enc"),
                    lambda x: x[-1],
                    Linear(500, z * 2, name="encoder_fc"),
                    Sampler(z),
                ],
                 [
                     Dropword(p, dummy_word=dummy_word),
                     lambda x: T.concatenate([
                         T.ones((1, x.shape[1]), dtype='int32') * start_word, x
                     ],
                                             axis=0),
                     lambda x: x[:-1],
                     OneHot(n_classes + 2),
                 ]]),
            ConditionalDecoderLNLSTM(n_classes + 2,
                                     z,
                                     500,
                                     name="dec",
                                     steps=sample_size),
            Linear(500, n_classes, name="classifier"),
            SoftMax()
        ]
    else:
        raise Exception("unknown net %s" % net)

    model = LMReconstructionModel(layers, aux_loss=False)

    return model
Example #3
0
def make_model(z, sample_size, dropword_p, n_classes, lstm_size, alpha):
    encoder = [
        OneHot(n_classes),
        LayoutRNNToCNN(),
        Convolution1d(3, 128, n_classes, pad=1, stride=2, causal=False, name="conv1"),
        BatchNormalization(128, name="bn1"),
        ReLU(),
        Convolution1d(3, 256, 128, pad=1, stride=2, causal=False, name="conv2"),
        BatchNormalization(256, name="bn2"),
        ReLU(),
        Convolution1d(3, 512, 256, pad=1, stride=2, causal=False, name="conv3"),
        BatchNormalization(512, name="bn3"),
        ReLU(),
        Convolution1d(3, 512, 512, pad=1, stride=2, causal=False, name="conv4"),
        BatchNormalization(512, name="bn4"),
        ReLU(),
        Convolution1d(3, 512, 512, pad=1, stride=2, causal=False, name="conv5"),
        BatchNormalization(512, name="bn5"),
        ReLU(),
        Flatten(),
        Linear(sample_size // (2**5) * 512, z * 2, name="fc_encode"),
        Sampler(z),
    ]
    decoder_from_z = [
        Linear(z, sample_size // (2**5) * 512, name="fc_decode"),
        ReLU(),
        Reshape((-1, 512, sample_size // (2**5), 1)),
        Deconvolution1D(512, 512, 3, pad=1, stride=2, name="deconv5"),
        BatchNormalization(512, name="deconv_bn5"),
        ReLU(),
        Deconvolution1D(512, 512, 3, pad=1, stride=2, name="deconv4"),
        BatchNormalization(512, name="deconv_bn4"),
        ReLU(),
        Deconvolution1D(512, 256, 3, pad=1, stride=2, name="deconv3"),
        BatchNormalization(256, name="deconv_bn3"),
        ReLU(),
        Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"),
        BatchNormalization(128, name="deconv_bn2"),
        ReLU(),
        Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"),
        BatchNormalization(200, name="deconv_bn1"),
        ReLU(),
        LayoutCNNToRNN(),
        Parallel([
            [
                Linear(200, n_classes, name="aux_classifier"),
                SoftMax(),
                Store()
            ],
            []
        ], shared_input=True),
        lambda x: x[1]
    ]

    start_word = n_classes
    dummy_word = n_classes + 1
    decoder_from_words = [
        Dropword(dropword_p, dummy_word=dummy_word),
        lambda x: T.concatenate([T.ones((1, x.shape[1]), dtype='int32') * start_word, x], axis=0),
        lambda x: x[:-1],
        OneHot(n_classes+2),
    ]
    layers = [
        Parallel([
            encoder,
            []
        ], shared_input=True),
        Parallel([
            decoder_from_z,
            decoder_from_words
        ], shared_input=False),
        lambda x: T.concatenate(x, axis=2),
        LNLSTM(200+n_classes+2, lstm_size, name="declstm"),
        Linear(lstm_size, n_classes, name="classifier"),
        SoftMax()
    ]

    model = LMReconstructionModel(layers, aux_loss=True, alpha=alpha)

    return model
Example #4
0
def main(z, sample_size, p, encdec_layers, lstm_size, pad_string, mode, alpha):
    vocab = pickle.load(open("data/char_vocab.pkl"))

    train_db = LmReconstructionDatabase("train",
                                        batches_per_epoch=1000,
                                        sample_size=sample_size,
                                        random_samples=False)
    valid_db = LmReconstructionDatabase("valid",
                                        batches_per_epoch=100,
                                        sample_size=sample_size,
                                        random_samples=False)

    model = make_model(z, sample_size, p, train_db.n_classes, encdec_layers,
                       lstm_size, alpha)
    name = "lm.charvae.z_%d.len_%d.layers_%d.p_%.2f.alpha_%.2f.lstmsz_%d" % \
           (z, sample_size, encdec_layers, p, alpha, lstm_size)
    model.load("exp/%s/model.flt" % name)
    model.set_phase(train=False)

    start_word = train_db.n_classes

    if mode == 'manifold':
        assert z == 2
        steps = 10
        eps = 0.001
        x = numpy.linspace(eps, 1 - eps, num=steps)
        y = numpy.linspace(eps, 1 - eps, num=steps)
        n = steps**2
        xy = [(i, j) for i in x for j in y]
        xy = numpy.asarray(xy)
        sampled = norm.ppf(xy)
    elif mode == 'vary':
        dim = numpy.random.randint(z)
        print "dimension %d" % dim
        s = "<unk> caller to a local radio station said cocaine"
        s = to_inputs(s, vocab, sample_size)
        encoder = model.layers[0].branches[0]
        sampler = encoder[-1]
        assert isinstance(sampler, Sampler)
        ins = s[:, None]
        x = T.imatrix()
        z = encoder(x)
        mu = sampler.mu
        f = theano.function([x], mu)
        z = f(ins.astype('int32'))
        s_z = z[0]
        n = 15
        eps = 0.001
        x = numpy.linspace(eps, 1 - eps, num=n)
        x = norm.ppf(x)
        sampled = numpy.repeat(s_z[None, :], n, axis=0)
        sampled[:, dim] = x
    elif mode == 'interpolate':
        s1 = "<unk> caller to a local radio station said cocaine"
        s2 = "giving up some of its gains as the dollar recovered"
        s1 = to_inputs(s1, vocab, sample_size)
        s2 = to_inputs(s2, vocab, sample_size)
        encoder = model.layers[0].branches[0]
        sampler = encoder[-1]
        assert isinstance(sampler, Sampler)
        ins = numpy.zeros((sample_size, 2))
        ins[:, 0] = s1
        ins[:, 1] = s2
        x = T.imatrix()
        z = encoder(x)
        mu = sampler.mu
        f = theano.function([x], mu)
        z = f(ins.astype('int32'))
        s1_z = z[0]
        s2_z = z[1]
        n = 15
        s1_z = numpy.repeat(s1_z[None, :], n, axis=0)
        s2_z = numpy.repeat(s2_z[None, :], n, axis=0)
        steps = numpy.linspace(0, 1, n)[:, None]
        sampled = s1_z * (1 - steps) + s2_z * steps
    else:
        n = 100
        sampled = numpy.random.normal(0, 1, (n, z))

    start_words = numpy.ones(n) * start_word
    start_words = theano.shared(start_words.astype('int32'))
    sampled = theano.shared(sampled.astype(theano.config.floatX))

    decoder_from_z = model.layers[1].branches[0]
    from_z = decoder_from_z(sampled.astype(theano.config.floatX))

    layers = model.layers[-3:]
    layers[0] = LNLSTMStep(layers[0])
    step = Sequential(layers)
    onehot = OneHot(train_db.n_classes + 2)

    words = start_words
    generated = []
    for i in xrange(sample_size):
        ins = T.concatenate([from_z[i], onehot(words)], axis=1)
        pred = step(ins)
        words = T.argmax(pred, axis=1)
        generated.append(words[None, :])

    generated = T.concatenate(generated, axis=0)
    f = theano.function([], outputs=generated)
    w = f()

    if pad_string not in vocab.word_to_index:
        vocab.add(pad_string)
    else:
        raise Exception("%s is already in the vocabulary" % pad_string)

    results = []

    for i in xrange(w.shape[1]):
        s = [vocab.by_index(idx) for idx in w[:, i]]
        r = ''.join(s)
        print r
        results.append(r)

    if mode == 'manifold':
        lines = 3
        steps = int(numpy.sqrt(n))
        for i in xrange(steps):
            for k in xrange(lines):
                for j in xrange(steps):
                    r = results[i * steps + j]
                    l = len(r) / lines
                    print r[k * l:(k + 1) * l], '  ',
                print
            print
Example #5
0
def make_model(z, sample_size, dropword_p, n_classes, encdec_layers,
               charcnn_size, charcnn_layers, alpha):
    assert sample_size % (2**encdec_layers) == 0
    if encdec_layers == 2:
        encoder = [
            OneHot(n_classes),
            LayoutRNNToCNN(),
            Convolution1d(3,
                          128,
                          n_classes,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv1"),
            BatchNormalization(128, name="bn1", collect=False),
            ReLU(),
            Convolution1d(3,
                          256,
                          128,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv2"),
            BatchNormalization(256, name="bn2", collect=False),
            ReLU(),
            Flatten(),
            Linear(sample_size // 4 * 256, z * 2, name="fc_encode"),
            Sampler(z),
        ]
        decoder_from_z = [
            Linear(z, sample_size // 4 * 256, name="fc_decode"),
            ReLU(),
            Reshape((-1, 256, sample_size // 4, 1)),
            Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"),
            BatchNormalization(128, name="deconv_bn2", collect=False),
            ReLU(),
            Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"),
            BatchNormalization(200, name="deconv_bn1", collect=False),
            ReLU(),
            LayoutCNNToRNN(),
            Parallel([[
                Linear(200, n_classes, name="aux_classifier"),
                SoftMax(),
                Store()
            ], []],
                     shared_input=True), lambda x: x[1]
        ]
    elif encdec_layers == 3:
        encoder = [
            OneHot(n_classes),
            LayoutRNNToCNN(),
            Convolution1d(3,
                          128,
                          n_classes,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv1"),
            BatchNormalization(128, name="bn1"),
            ReLU(),
            Convolution1d(3,
                          256,
                          128,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv2"),
            BatchNormalization(256, name="bn2"),
            ReLU(),
            Convolution1d(3,
                          512,
                          256,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv3"),
            BatchNormalization(512, name="bn3"),
            ReLU(),
            Flatten(),
            Linear(sample_size // 8 * 512, z * 2, name="fc_encode"),
            Sampler(z),
        ]
        decoder_from_z = [
            Linear(z, sample_size // 8 * 512, name="fc_decode"),
            ReLU(),
            Reshape((-1, 512, sample_size // 8, 1)),
            Deconvolution1D(512, 256, 3, pad=1, stride=2, name="deconv3"),
            BatchNormalization(256, name="deconv_bn3", collect=False),
            ReLU(),
            Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"),
            BatchNormalization(128, name="deconv_bn2", collect=False),
            ReLU(),
            Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"),
            BatchNormalization(200, name="deconv_bn1", collect=False),
            ReLU(),
            LayoutCNNToRNN(),
            Parallel([[
                Linear(200, n_classes, name="aux_classifier"),
                SoftMax(),
                Store()
            ], []],
                     shared_input=True), lambda x: x[1]
        ]
    else:
        raise Exception("unsupported number of encdec layers %d" %
                        encdec_layers)

    start_word = n_classes
    dummy_word = n_classes + 1
    decoder_from_words = [
        Dropword(dropword_p, dummy_word=dummy_word),
        lambda x: T.concatenate(
            [T.ones((1, x.shape[1]), dtype='int32') * start_word, x], axis=0),
        lambda x: x[:-1],
        OneHot(n_classes + 2),
    ]
    layers = [
        Parallel([encoder, []], shared_input=True),
        Parallel([decoder_from_z, decoder_from_words], shared_input=False),
        lambda x: T.concatenate(x, axis=2),
        LayoutRNNToCNN(),
        Convolution1d(1,
                      charcnn_size * 2,
                      200 + n_classes + 2,
                      name="decconvresize"),
        BatchNormalization(charcnn_size * 2, name="decbnresize"),
        Gated(),
    ]
    for i in range(charcnn_layers):
        layers.append(
            HighwayConvolution1d(3,
                                 charcnn_size,
                                 dilation=1,
                                 name="decconv%d" % i))
    layers.extend([
        LayoutCNNToRNN(),
        Linear(charcnn_size, n_classes, name="classifier"),
        SoftMax()
    ])

    model = LMReconstructionModel(layers, aux_loss=True, alpha=alpha)

    return model