def make_model(n_classes, charcnn_size, charcnn_layers): layers = [ OneHot(n_classes + 1), LayoutRNNToCNN(), Convolution1d(1, charcnn_size * 2, n_classes + 1, name="decconvresize"), BatchNormalization(charcnn_size * 2, name="decbnresize"), Gated(), ] for i in range(charcnn_layers): layers.append( HighwayConvolution1d(3, charcnn_size, dilation=1, name="decconv%d" % i)) layers.extend([ LayoutCNNToRNN(), Linear(charcnn_size, n_classes, name="classifier"), SoftMax() ]) model = LMModel(layers) return model
def make_model(z, net, sample_size, p, n_classes): if net == "conv": assert sample_size % 4 == 0 layers = [ OneHot(n_classes), LayoutRNNToCNN(), Convolution1d(3, 128, n_classes, pad=1, stride=2, causal=False, name="conv1"), BatchNormalization(128, name="bn1"), ReLU(), Convolution1d(3, 256, 128, pad=1, stride=2, causal=False, name="conv2"), BatchNormalization(256, name="bn2"), ReLU(), Flatten(), Linear(sample_size / 4 * 256, z * 2, name="fc_encode"), Sampler(z), Linear(z, sample_size / 4 * 256, name="fc_decode"), ReLU(), Reshape((-1, 256, sample_size / 4, 1)), Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"), BatchNormalization(128, name="deconv_bn2"), ReLU(), Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"), BatchNormalization(200, name="deconv_bn1"), ReLU(), LayoutCNNToRNN(), Linear(200, n_classes, name="classifier"), SoftMax() ] elif net == "rnn": start_word = n_classes dummy_word = n_classes + 1 layers = [ Parallel( [[ OneHot(n_classes), LNLSTM(n_classes, 500, name="enc"), lambda x: x[-1], Linear(500, z * 2, name="encoder_fc"), Sampler(z), ], [ Dropword(p, dummy_word=dummy_word), lambda x: T.concatenate([ T.ones((1, x.shape[1]), dtype='int32') * start_word, x ], axis=0), lambda x: x[:-1], OneHot(n_classes + 2), ]]), ConditionalDecoderLNLSTM(n_classes + 2, z, 500, name="dec", steps=sample_size), Linear(500, n_classes, name="classifier"), SoftMax() ] else: raise Exception("unknown net %s" % net) model = LMReconstructionModel(layers, aux_loss=False) return model
def make_model(z, sample_size, dropword_p, n_classes, lstm_size, alpha): encoder = [ OneHot(n_classes), LayoutRNNToCNN(), Convolution1d(3, 128, n_classes, pad=1, stride=2, causal=False, name="conv1"), BatchNormalization(128, name="bn1"), ReLU(), Convolution1d(3, 256, 128, pad=1, stride=2, causal=False, name="conv2"), BatchNormalization(256, name="bn2"), ReLU(), Convolution1d(3, 512, 256, pad=1, stride=2, causal=False, name="conv3"), BatchNormalization(512, name="bn3"), ReLU(), Convolution1d(3, 512, 512, pad=1, stride=2, causal=False, name="conv4"), BatchNormalization(512, name="bn4"), ReLU(), Convolution1d(3, 512, 512, pad=1, stride=2, causal=False, name="conv5"), BatchNormalization(512, name="bn5"), ReLU(), Flatten(), Linear(sample_size // (2**5) * 512, z * 2, name="fc_encode"), Sampler(z), ] decoder_from_z = [ Linear(z, sample_size // (2**5) * 512, name="fc_decode"), ReLU(), Reshape((-1, 512, sample_size // (2**5), 1)), Deconvolution1D(512, 512, 3, pad=1, stride=2, name="deconv5"), BatchNormalization(512, name="deconv_bn5"), ReLU(), Deconvolution1D(512, 512, 3, pad=1, stride=2, name="deconv4"), BatchNormalization(512, name="deconv_bn4"), ReLU(), Deconvolution1D(512, 256, 3, pad=1, stride=2, name="deconv3"), BatchNormalization(256, name="deconv_bn3"), ReLU(), Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"), BatchNormalization(128, name="deconv_bn2"), ReLU(), Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"), BatchNormalization(200, name="deconv_bn1"), ReLU(), LayoutCNNToRNN(), Parallel([ [ Linear(200, n_classes, name="aux_classifier"), SoftMax(), Store() ], [] ], shared_input=True), lambda x: x[1] ] start_word = n_classes dummy_word = n_classes + 1 decoder_from_words = [ Dropword(dropword_p, dummy_word=dummy_word), lambda x: T.concatenate([T.ones((1, x.shape[1]), dtype='int32') * start_word, x], axis=0), lambda x: x[:-1], OneHot(n_classes+2), ] layers = [ Parallel([ encoder, [] ], shared_input=True), Parallel([ decoder_from_z, decoder_from_words ], shared_input=False), lambda x: T.concatenate(x, axis=2), LNLSTM(200+n_classes+2, lstm_size, name="declstm"), Linear(lstm_size, n_classes, name="classifier"), SoftMax() ] model = LMReconstructionModel(layers, aux_loss=True, alpha=alpha) return model
def main(z, sample_size, p, encdec_layers, lstm_size, pad_string, mode, alpha): vocab = pickle.load(open("data/char_vocab.pkl")) train_db = LmReconstructionDatabase("train", batches_per_epoch=1000, sample_size=sample_size, random_samples=False) valid_db = LmReconstructionDatabase("valid", batches_per_epoch=100, sample_size=sample_size, random_samples=False) model = make_model(z, sample_size, p, train_db.n_classes, encdec_layers, lstm_size, alpha) name = "lm.charvae.z_%d.len_%d.layers_%d.p_%.2f.alpha_%.2f.lstmsz_%d" % \ (z, sample_size, encdec_layers, p, alpha, lstm_size) model.load("exp/%s/model.flt" % name) model.set_phase(train=False) start_word = train_db.n_classes if mode == 'manifold': assert z == 2 steps = 10 eps = 0.001 x = numpy.linspace(eps, 1 - eps, num=steps) y = numpy.linspace(eps, 1 - eps, num=steps) n = steps**2 xy = [(i, j) for i in x for j in y] xy = numpy.asarray(xy) sampled = norm.ppf(xy) elif mode == 'vary': dim = numpy.random.randint(z) print "dimension %d" % dim s = "<unk> caller to a local radio station said cocaine" s = to_inputs(s, vocab, sample_size) encoder = model.layers[0].branches[0] sampler = encoder[-1] assert isinstance(sampler, Sampler) ins = s[:, None] x = T.imatrix() z = encoder(x) mu = sampler.mu f = theano.function([x], mu) z = f(ins.astype('int32')) s_z = z[0] n = 15 eps = 0.001 x = numpy.linspace(eps, 1 - eps, num=n) x = norm.ppf(x) sampled = numpy.repeat(s_z[None, :], n, axis=0) sampled[:, dim] = x elif mode == 'interpolate': s1 = "<unk> caller to a local radio station said cocaine" s2 = "giving up some of its gains as the dollar recovered" s1 = to_inputs(s1, vocab, sample_size) s2 = to_inputs(s2, vocab, sample_size) encoder = model.layers[0].branches[0] sampler = encoder[-1] assert isinstance(sampler, Sampler) ins = numpy.zeros((sample_size, 2)) ins[:, 0] = s1 ins[:, 1] = s2 x = T.imatrix() z = encoder(x) mu = sampler.mu f = theano.function([x], mu) z = f(ins.astype('int32')) s1_z = z[0] s2_z = z[1] n = 15 s1_z = numpy.repeat(s1_z[None, :], n, axis=0) s2_z = numpy.repeat(s2_z[None, :], n, axis=0) steps = numpy.linspace(0, 1, n)[:, None] sampled = s1_z * (1 - steps) + s2_z * steps else: n = 100 sampled = numpy.random.normal(0, 1, (n, z)) start_words = numpy.ones(n) * start_word start_words = theano.shared(start_words.astype('int32')) sampled = theano.shared(sampled.astype(theano.config.floatX)) decoder_from_z = model.layers[1].branches[0] from_z = decoder_from_z(sampled.astype(theano.config.floatX)) layers = model.layers[-3:] layers[0] = LNLSTMStep(layers[0]) step = Sequential(layers) onehot = OneHot(train_db.n_classes + 2) words = start_words generated = [] for i in xrange(sample_size): ins = T.concatenate([from_z[i], onehot(words)], axis=1) pred = step(ins) words = T.argmax(pred, axis=1) generated.append(words[None, :]) generated = T.concatenate(generated, axis=0) f = theano.function([], outputs=generated) w = f() if pad_string not in vocab.word_to_index: vocab.add(pad_string) else: raise Exception("%s is already in the vocabulary" % pad_string) results = [] for i in xrange(w.shape[1]): s = [vocab.by_index(idx) for idx in w[:, i]] r = ''.join(s) print r results.append(r) if mode == 'manifold': lines = 3 steps = int(numpy.sqrt(n)) for i in xrange(steps): for k in xrange(lines): for j in xrange(steps): r = results[i * steps + j] l = len(r) / lines print r[k * l:(k + 1) * l], ' ', print print
def make_model(z, sample_size, dropword_p, n_classes, encdec_layers, charcnn_size, charcnn_layers, alpha): assert sample_size % (2**encdec_layers) == 0 if encdec_layers == 2: encoder = [ OneHot(n_classes), LayoutRNNToCNN(), Convolution1d(3, 128, n_classes, pad=1, stride=2, causal=False, name="conv1"), BatchNormalization(128, name="bn1", collect=False), ReLU(), Convolution1d(3, 256, 128, pad=1, stride=2, causal=False, name="conv2"), BatchNormalization(256, name="bn2", collect=False), ReLU(), Flatten(), Linear(sample_size // 4 * 256, z * 2, name="fc_encode"), Sampler(z), ] decoder_from_z = [ Linear(z, sample_size // 4 * 256, name="fc_decode"), ReLU(), Reshape((-1, 256, sample_size // 4, 1)), Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"), BatchNormalization(128, name="deconv_bn2", collect=False), ReLU(), Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"), BatchNormalization(200, name="deconv_bn1", collect=False), ReLU(), LayoutCNNToRNN(), Parallel([[ Linear(200, n_classes, name="aux_classifier"), SoftMax(), Store() ], []], shared_input=True), lambda x: x[1] ] elif encdec_layers == 3: encoder = [ OneHot(n_classes), LayoutRNNToCNN(), Convolution1d(3, 128, n_classes, pad=1, stride=2, causal=False, name="conv1"), BatchNormalization(128, name="bn1"), ReLU(), Convolution1d(3, 256, 128, pad=1, stride=2, causal=False, name="conv2"), BatchNormalization(256, name="bn2"), ReLU(), Convolution1d(3, 512, 256, pad=1, stride=2, causal=False, name="conv3"), BatchNormalization(512, name="bn3"), ReLU(), Flatten(), Linear(sample_size // 8 * 512, z * 2, name="fc_encode"), Sampler(z), ] decoder_from_z = [ Linear(z, sample_size // 8 * 512, name="fc_decode"), ReLU(), Reshape((-1, 512, sample_size // 8, 1)), Deconvolution1D(512, 256, 3, pad=1, stride=2, name="deconv3"), BatchNormalization(256, name="deconv_bn3", collect=False), ReLU(), Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"), BatchNormalization(128, name="deconv_bn2", collect=False), ReLU(), Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"), BatchNormalization(200, name="deconv_bn1", collect=False), ReLU(), LayoutCNNToRNN(), Parallel([[ Linear(200, n_classes, name="aux_classifier"), SoftMax(), Store() ], []], shared_input=True), lambda x: x[1] ] else: raise Exception("unsupported number of encdec layers %d" % encdec_layers) start_word = n_classes dummy_word = n_classes + 1 decoder_from_words = [ Dropword(dropword_p, dummy_word=dummy_word), lambda x: T.concatenate( [T.ones((1, x.shape[1]), dtype='int32') * start_word, x], axis=0), lambda x: x[:-1], OneHot(n_classes + 2), ] layers = [ Parallel([encoder, []], shared_input=True), Parallel([decoder_from_z, decoder_from_words], shared_input=False), lambda x: T.concatenate(x, axis=2), LayoutRNNToCNN(), Convolution1d(1, charcnn_size * 2, 200 + n_classes + 2, name="decconvresize"), BatchNormalization(charcnn_size * 2, name="decbnresize"), Gated(), ] for i in range(charcnn_layers): layers.append( HighwayConvolution1d(3, charcnn_size, dilation=1, name="decconv%d" % i)) layers.extend([ LayoutCNNToRNN(), Linear(charcnn_size, n_classes, name="classifier"), SoftMax() ]) model = LMReconstructionModel(layers, aux_loss=True, alpha=alpha) return model