Example #1
0
class LNGRU(object):

    def __init__(self, input_size, layer_size, batch_size=1, name="", p=0.0, weight_init=Uniform(),
                 inner_activation=Sigmoid(), activation=Tanh(), persistent=False):
        self.activation = activation
        self.inner_activation = inner_activation
        self.layer_size = layer_size
        self.batch_size = batch_size
        self.persistent = persistent
        self.h = theano.shared(numpy.zeros((batch_size, layer_size), dtype=theano.config.floatX), name=name + "_h_init")

        self.rz = Sequential([
            Linear(input_size+layer_size, layer_size * 2, weight_init=weight_init, name=name+"_r"),
            LayerNormalization(layer_size * 2, name=name+"_ln_r"),
            inner_activation
        ])
        self.g = Sequential([
            Linear(input_size+layer_size, layer_size, weight_init=weight_init, name=name+"_g"),
            LayerNormalization(layer_size, name=name+"_ln_g"),
            activation,
            Dropout(p)
        ])

        self.params = self.rz.params + self.g.params

    def step(self, x, h_tm1):
        rz_t = self.rz(T.concatenate([x, h_tm1], axis=1))
        r_t = rz_t[:, :self.layer_size]
        z_t = rz_t[:, self.layer_size:]
        g_t = self.g(T.concatenate([x, r_t * h_tm1], axis=1))
        h_t = (1 - z_t) * h_tm1 + z_t * g_t
        return h_t

    def __call__(self, x):
        h_init = T.zeros((x.shape[1], self.layer_size))

        h, upd = theano.scan(self.step, sequences=x, outputs_info=[h_init])

        self.updates = upd
        if self.persistent:
            self.updates[self.h] = h[-1]

        return h

    def set_phase(self, train):
        self.g.set_phase(train)

    def reset(self):
        if self.persistent:
            self.h.set_value(numpy.zeros((self.batch_size, self.layer_size), dtype=theano.config.floatX))
Example #2
0
    def __init__(self,
                 kernel_size,
                 input_size,
                 causal=True,
                 dilation=1,
                 weight_init=Uniform(),
                 name="",
                 keepdims=False,
                 p=0.0):
        from nn.normalization import LayerNormalization

        assert kernel_size == 3

        self.conv = Sequential([
            Convolution1d(kernel_size,
                          input_size * 3,
                          input_size,
                          pad=dilation,
                          causal=causal,
                          dilation=dilation,
                          weight_init=weight_init,
                          name=name,
                          keepdims=keepdims),
            BatchNormalization(input_size * 3, name=name + "_bn"),
        ])
        self.dropout = Dropout(p)
        self.input_size = input_size
        self.params = self.conv.params
Example #3
0
    def __init__(self, input_size, layer_size, batch_size, p=0.0,
                 name="", activation=T.tanh, weight_init=Uniform(), persistent=False):
        self.h = theano.shared(numpy.zeros((batch_size, layer_size), dtype=theano.config.floatX), name=name+"_h_init")

        self.preact = Sequential([
            Linear(input_size+layer_size, layer_size, weight_init=weight_init, name=name+"_fc"),
            LayerNormalization(layer_size, name=name+"_ln"),
            activation,
            Dropout(p)
        ])
        self.params = self.preact.params

        self.activation = activation
        self.batch_size = batch_size
        self.layer_size = layer_size
        self.input_size = input_size
        self.persistent = persistent
Example #4
0
    def __init__(self, input_size, layer_size, batch_size=1, name="", p=0.0, weight_init=Uniform(),
                 inner_activation=Sigmoid(), activation=Tanh(), persistent=False):
        self.activation = activation
        self.inner_activation = inner_activation
        self.layer_size = layer_size
        self.batch_size = batch_size
        self.persistent = persistent
        self.h = theano.shared(numpy.zeros((batch_size, layer_size), dtype=theano.config.floatX), name=name + "_h_init")

        self.rz = Sequential([
            Linear(input_size+layer_size, layer_size * 2, weight_init=weight_init, name=name+"_r"),
            LayerNormalization(layer_size * 2, name=name+"_ln_r"),
            inner_activation
        ])
        self.g = Sequential([
            Linear(input_size+layer_size, layer_size, weight_init=weight_init, name=name+"_g"),
            LayerNormalization(layer_size, name=name+"_ln_g"),
            activation,
            Dropout(p)
        ])

        self.params = self.rz.params + self.g.params
Example #5
0
class LNRNN(object):
    def __init__(self, input_size, layer_size, batch_size, p=0.0,
                 name="", activation=T.tanh, weight_init=Uniform(), persistent=False):
        self.h = theano.shared(numpy.zeros((batch_size, layer_size), dtype=theano.config.floatX), name=name+"_h_init")

        self.preact = Sequential([
            Linear(input_size+layer_size, layer_size, weight_init=weight_init, name=name+"_fc"),
            LayerNormalization(layer_size, name=name+"_ln"),
            activation,
            Dropout(p)
        ])
        self.params = self.preact.params

        self.activation = activation
        self.batch_size = batch_size
        self.layer_size = layer_size
        self.input_size = input_size
        self.persistent = persistent

    def __call__(self, x):
        h_init = self.h if self.persistent else T.zeros((x.shape[1], self.layer_size))
        h, upd = theano.scan(self.step, x, outputs_info=[h_init])
        if self.persistent:
            upd[self.h] = h[-1]

        self.updates = upd

        return h

    def step(self, x_t, h_tm1):
        h_t = self.preact(T.concatenate([x_t, h_tm1], axis=1))
        return h_t

    def set_phase(self, train):
        self.preact.set_phase(train)

    def reset(self):
        if self.persistent:
            self.h.set_value(numpy.zeros((self.batch_size, self.layer_size), dtype=theano.config.floatX))
Example #6
0
 def __init__(self,
              input_size,
              image_feature_size,
              layer_size,
              p=0.0,
              name="",
              steps=16):
     self.steps = steps
     self.lstm = LNLSTM(input_size, layer_size, name=name, p=p)
     self.init = Sequential([
         Linear(image_feature_size, 2 * layer_size, name=name + "_init"),
         Tanh()
     ])
     self.params = self.init.params + self.lstm.params
Example #7
0
    def __init__(self, input_size, layer_size, batch_size=1, p=0.0,
                 name="", activation=T.tanh, inner_activation=T.nnet.sigmoid, weight_init=Uniform(), persistent=False):

        self.h = theano.shared(numpy.zeros((batch_size, layer_size), dtype=theano.config.floatX), name=name+"_h_init")
        self.c = theano.shared(numpy.zeros((batch_size, layer_size), dtype=theano.config.floatX), name=name+"_c_init")

        self.params = []
        self.preact = Sequential([
            Linear(input_size+layer_size, layer_size * 4, weight_init=weight_init, name=name+"_ifog"),
            LayerNormalization(layer_size * 4, name=name + "_ln")
        ])
        self.params = self.preact.params

        self.dropout = Dropout(p)

        self.updates = []
        self.activation = activation
        self.inner_activation = inner_activation
        self.batch_size = batch_size
        self.layer_size = layer_size
        self.persistent = persistent
Example #8
0
    def __init__(self,
                 kernel_size,
                 kernel_number,
                 input_size,
                 causal=True,
                 dilation=1,
                 weight_init=Uniform(),
                 name="",
                 keepdims=False):

        assert kernel_number % 2 == 0
        assert kernel_size == 3

        self.conv = Sequential([
            Convolution1d(kernel_size,
                          kernel_number,
                          input_size,
                          pad=dilation,
                          causal=causal,
                          dilation=dilation,
                          weight_init=weight_init,
                          name=name,
                          keepdims=keepdims),
            BatchNormalization(kernel_number, collect=False,
                               name=name + "_bn"),
            Gated(),
            Convolution1d(1,
                          input_size,
                          kernel_number / 2,
                          pad=0,
                          causal=causal,
                          keepdims=keepdims,
                          weight_init=weight_init,
                          name=name + "_1x1"),
        ])
        self.params = self.conv.params
n = 7  # an odd number, so there's one in the center!

encoder = model.layers[0].branches[0]
sampler = encoder[-1]

start_words = numpy.ones(n) * start_word
start_words = theano.shared(start_words.astype('int32'))
#sampled = theano.shared(sampled.astype(theano.config.floatX))

decoder_from_z = model.layers[1].branches[0]
x = T.fmatrix('x')
from_z = decoder_from_z(x)  #sampled.astype(theano.config.floatX))

layers = model.layers[-3:]
layers[0] = LNLSTMStep(layers[0])
step = Sequential(layers)
embed = model.layers[1].branches[1].layers[-1]

#onehot = OneHot(n_classes + 3) # <unk>, <pad>, <end> I think?

words = start_words
generated = []

#print(from_z)
#print(words)

for i in xrange(max_len):
    #print(onehot(words))
    ins = T.concatenate([from_z[i], embed(words)], axis=1)
    pred = step(ins)
    words = T.argmax(pred, axis=1)
Example #10
0
def main(z, sample_size, p, encdec_layers, lstm_size, pad_string, mode, alpha):
    vocab = pickle.load(open("data/char_vocab.pkl"))

    train_db = LmReconstructionDatabase("train",
                                        batches_per_epoch=1000,
                                        sample_size=sample_size,
                                        random_samples=False)
    valid_db = LmReconstructionDatabase("valid",
                                        batches_per_epoch=100,
                                        sample_size=sample_size,
                                        random_samples=False)

    model = make_model(z, sample_size, p, train_db.n_classes, encdec_layers,
                       lstm_size, alpha)
    name = "lm.charvae.z_%d.len_%d.layers_%d.p_%.2f.alpha_%.2f.lstmsz_%d" % \
           (z, sample_size, encdec_layers, p, alpha, lstm_size)
    model.load("exp/%s/model.flt" % name)
    model.set_phase(train=False)

    start_word = train_db.n_classes

    if mode == 'manifold':
        assert z == 2
        steps = 10
        eps = 0.001
        x = numpy.linspace(eps, 1 - eps, num=steps)
        y = numpy.linspace(eps, 1 - eps, num=steps)
        n = steps**2
        xy = [(i, j) for i in x for j in y]
        xy = numpy.asarray(xy)
        sampled = norm.ppf(xy)
    elif mode == 'vary':
        dim = numpy.random.randint(z)
        print "dimension %d" % dim
        s = "<unk> caller to a local radio station said cocaine"
        s = to_inputs(s, vocab, sample_size)
        encoder = model.layers[0].branches[0]
        sampler = encoder[-1]
        assert isinstance(sampler, Sampler)
        ins = s[:, None]
        x = T.imatrix()
        z = encoder(x)
        mu = sampler.mu
        f = theano.function([x], mu)
        z = f(ins.astype('int32'))
        s_z = z[0]
        n = 15
        eps = 0.001
        x = numpy.linspace(eps, 1 - eps, num=n)
        x = norm.ppf(x)
        sampled = numpy.repeat(s_z[None, :], n, axis=0)
        sampled[:, dim] = x
    elif mode == 'interpolate':
        s1 = "<unk> caller to a local radio station said cocaine"
        s2 = "giving up some of its gains as the dollar recovered"
        s1 = to_inputs(s1, vocab, sample_size)
        s2 = to_inputs(s2, vocab, sample_size)
        encoder = model.layers[0].branches[0]
        sampler = encoder[-1]
        assert isinstance(sampler, Sampler)
        ins = numpy.zeros((sample_size, 2))
        ins[:, 0] = s1
        ins[:, 1] = s2
        x = T.imatrix()
        z = encoder(x)
        mu = sampler.mu
        f = theano.function([x], mu)
        z = f(ins.astype('int32'))
        s1_z = z[0]
        s2_z = z[1]
        n = 15
        s1_z = numpy.repeat(s1_z[None, :], n, axis=0)
        s2_z = numpy.repeat(s2_z[None, :], n, axis=0)
        steps = numpy.linspace(0, 1, n)[:, None]
        sampled = s1_z * (1 - steps) + s2_z * steps
    else:
        n = 100
        sampled = numpy.random.normal(0, 1, (n, z))

    start_words = numpy.ones(n) * start_word
    start_words = theano.shared(start_words.astype('int32'))
    sampled = theano.shared(sampled.astype(theano.config.floatX))

    decoder_from_z = model.layers[1].branches[0]
    from_z = decoder_from_z(sampled.astype(theano.config.floatX))

    layers = model.layers[-3:]
    layers[0] = LNLSTMStep(layers[0])
    step = Sequential(layers)
    onehot = OneHot(train_db.n_classes + 2)

    words = start_words
    generated = []
    for i in xrange(sample_size):
        ins = T.concatenate([from_z[i], onehot(words)], axis=1)
        pred = step(ins)
        words = T.argmax(pred, axis=1)
        generated.append(words[None, :])

    generated = T.concatenate(generated, axis=0)
    f = theano.function([], outputs=generated)
    w = f()

    if pad_string not in vocab.word_to_index:
        vocab.add(pad_string)
    else:
        raise Exception("%s is already in the vocabulary" % pad_string)

    results = []

    for i in xrange(w.shape[1]):
        s = [vocab.by_index(idx) for idx in w[:, i]]
        r = ''.join(s)
        print r
        results.append(r)

    if mode == 'manifold':
        lines = 3
        steps = int(numpy.sqrt(n))
        for i in xrange(steps):
            for k in xrange(lines):
                for j in xrange(steps):
                    r = results[i * steps + j]
                    l = len(r) / lines
                    print r[k * l:(k + 1) * l], '  ',
                print
            print
def main(z, sample_size, p, lstm_size, mode, alpha):
    vocab = Vocabulary()
    vocab.add('<pad>')
    vocab.add('<unk>')
    vocab.add('<end>')
    for i in range(256):
        ch = chr(i)
        vocab.add(ch)
    n_classes = len(vocab)

    model = make_model(z, sample_size, p, n_classes, lstm_size, alpha)
    name = "twittervae.charlevel.z_%d.len_%d.p_%.2f.lstmsz_%d.alpha_%.2f" % (
        z, sample_size, p, lstm_size, alpha)
    model.load("exp/%s/model.flt" % name)
    model.set_phase(train=False)

    start_word = n_classes

    if mode == 'vary':
        n = 7
        sampled = numpy.random.normal(0, 1, (1, z))
        sampled = numpy.repeat(sampled, n * z, axis=0)
        for dim in range(z):
            eps = 0.01
            x = numpy.linspace(eps, 1 - eps, num=n)
            x = norm.ppf(x)
            sampled[dim * n:(dim + 1) * n, dim] = x
        n *= z
    elif mode == 'interpolatereal':
        valid_db = TwitterReconstructionDatabase("valid",
                                                 50,
                                                 batches_per_epoch=100,
                                                 max_len=sample_size)
        s1 = numpy.random.randint(0, len(valid_db.tweets))
        s2 = numpy.random.randint(0, len(valid_db.tweets))
        encoder = model.layers[0].branches[0]
        sampler = encoder[-1]
        assert isinstance(sampler, Sampler)
        ins = numpy.zeros((sample_size, 2))
        ins[:, 0] = valid_db.to_inputs(valid_db.tweets[s1])
        ins[:, 1] = valid_db.to_inputs(valid_db.tweets[s2])
        x = T.imatrix()
        z = encoder(x)
        mu = sampler.mu
        f = theano.function([x], mu)
        z = f(ins.astype('int32'))
        s1_z = z[0]
        s2_z = z[1]
        n = 7
        s1_z = numpy.repeat(s1_z[None, :], n, axis=0)
        s2_z = numpy.repeat(s2_z[None, :], n, axis=0)
        steps = numpy.linspace(0, 1, n)[:, None]
        sampled = s1_z * (1 - steps) + s2_z * steps
    elif mode == 'arithm':
        valid_db = TwitterReconstructionDatabase("valid",
                                                 50,
                                                 batches_per_epoch=100,
                                                 max_len=sample_size)
        s1 = numpy.random.randint(0, len(valid_db.tweets))
        s2 = numpy.random.randint(0, len(valid_db.tweets))
        s3 = numpy.random.randint(0, len(valid_db.tweets))
        print(valid_db.tweets[s1])
        print(valid_db.tweets[s2])
        print(valid_db.tweets[s3])
        encoder = model.layers[0].branches[0]
        sampler = encoder[-1]
        assert isinstance(sampler, Sampler)
        ins = numpy.zeros((sample_size, 3))
        ins[:, 0] = valid_db.to_inputs(valid_db.tweets[s1])
        ins[:, 1] = valid_db.to_inputs(valid_db.tweets[s2])
        ins[:, 2] = valid_db.to_inputs(valid_db.tweets[s3])
        x = T.imatrix()
        z = encoder(x)
        mu = sampler.mu
        f = theano.function([x], mu)
        z = f(ins.astype('int32'))
        s1_z = z[0]
        s2_z = z[1]
        s3_z = z[1]
        n = 1
        sampled = s1_z - s2_z + s3_z
        sampled = sampled[None, :]
    elif mode == 'interpolate':
        z = numpy.random.normal(0, 1, (2, z))
        s1_z = z[0]
        s2_z = z[1]
        n = 7
        s1_z = numpy.repeat(s1_z[None, :], n, axis=0)
        s2_z = numpy.repeat(s2_z[None, :], n, axis=0)
        steps = numpy.linspace(0, 1, n)[:, None]
        sampled = s1_z * (1 - steps) + s2_z * steps
    else:
        n = 100
        sampled = numpy.random.normal(0, 1, (n, z))

    start_words = numpy.ones(n) * start_word
    start_words = theano.shared(start_words.astype('int32'))
    sampled = theano.shared(sampled.astype(theano.config.floatX))

    decoder_from_z = model.layers[1].branches[0]
    from_z = decoder_from_z(sampled)

    layers = model.layers[-3:]
    layers[0] = LNLSTMStep(layers[0])
    step = Sequential(layers)
    embed = model.layers[1].branches[1].layers[-1]

    words = start_words
    generated = []
    for i in range(sample_size):
        ins = T.concatenate([from_z[i], embed(words)], axis=1)
        pred = step(ins)
        words = T.argmax(pred, axis=1)
        generated.append(words[None, :])

    generated = T.concatenate(generated, axis=0)
    import time
    t = time.time()
    print("compiling...", end=' ')
    f = theano.function([], outputs=generated)
    print("done, took %f secs" % (time.time() - t))
    w = f()

    results = []

    pad = vocab.by_word("<pad>")
    end = vocab.by_word("<end>")
    for i in range(w.shape[1]):
        s = []
        for idx in w[:, i]:
            if idx == end:
                break
            if idx == pad:
                break
            s.append(vocab.by_index(idx))
        r = ''.join(s)
        if mode == "vary":
            if i % n == 0:
                print("dimension %d" % (i / n))
        print(r.strip())
        results.append(r)