class LNGRU(object): def __init__(self, input_size, layer_size, batch_size=1, name="", p=0.0, weight_init=Uniform(), inner_activation=Sigmoid(), activation=Tanh(), persistent=False): self.activation = activation self.inner_activation = inner_activation self.layer_size = layer_size self.batch_size = batch_size self.persistent = persistent self.h = theano.shared(numpy.zeros((batch_size, layer_size), dtype=theano.config.floatX), name=name + "_h_init") self.rz = Sequential([ Linear(input_size+layer_size, layer_size * 2, weight_init=weight_init, name=name+"_r"), LayerNormalization(layer_size * 2, name=name+"_ln_r"), inner_activation ]) self.g = Sequential([ Linear(input_size+layer_size, layer_size, weight_init=weight_init, name=name+"_g"), LayerNormalization(layer_size, name=name+"_ln_g"), activation, Dropout(p) ]) self.params = self.rz.params + self.g.params def step(self, x, h_tm1): rz_t = self.rz(T.concatenate([x, h_tm1], axis=1)) r_t = rz_t[:, :self.layer_size] z_t = rz_t[:, self.layer_size:] g_t = self.g(T.concatenate([x, r_t * h_tm1], axis=1)) h_t = (1 - z_t) * h_tm1 + z_t * g_t return h_t def __call__(self, x): h_init = T.zeros((x.shape[1], self.layer_size)) h, upd = theano.scan(self.step, sequences=x, outputs_info=[h_init]) self.updates = upd if self.persistent: self.updates[self.h] = h[-1] return h def set_phase(self, train): self.g.set_phase(train) def reset(self): if self.persistent: self.h.set_value(numpy.zeros((self.batch_size, self.layer_size), dtype=theano.config.floatX))
def __init__(self, kernel_size, input_size, causal=True, dilation=1, weight_init=Uniform(), name="", keepdims=False, p=0.0): from nn.normalization import LayerNormalization assert kernel_size == 3 self.conv = Sequential([ Convolution1d(kernel_size, input_size * 3, input_size, pad=dilation, causal=causal, dilation=dilation, weight_init=weight_init, name=name, keepdims=keepdims), BatchNormalization(input_size * 3, name=name + "_bn"), ]) self.dropout = Dropout(p) self.input_size = input_size self.params = self.conv.params
def __init__(self, input_size, layer_size, batch_size, p=0.0, name="", activation=T.tanh, weight_init=Uniform(), persistent=False): self.h = theano.shared(numpy.zeros((batch_size, layer_size), dtype=theano.config.floatX), name=name+"_h_init") self.preact = Sequential([ Linear(input_size+layer_size, layer_size, weight_init=weight_init, name=name+"_fc"), LayerNormalization(layer_size, name=name+"_ln"), activation, Dropout(p) ]) self.params = self.preact.params self.activation = activation self.batch_size = batch_size self.layer_size = layer_size self.input_size = input_size self.persistent = persistent
def __init__(self, input_size, layer_size, batch_size=1, name="", p=0.0, weight_init=Uniform(), inner_activation=Sigmoid(), activation=Tanh(), persistent=False): self.activation = activation self.inner_activation = inner_activation self.layer_size = layer_size self.batch_size = batch_size self.persistent = persistent self.h = theano.shared(numpy.zeros((batch_size, layer_size), dtype=theano.config.floatX), name=name + "_h_init") self.rz = Sequential([ Linear(input_size+layer_size, layer_size * 2, weight_init=weight_init, name=name+"_r"), LayerNormalization(layer_size * 2, name=name+"_ln_r"), inner_activation ]) self.g = Sequential([ Linear(input_size+layer_size, layer_size, weight_init=weight_init, name=name+"_g"), LayerNormalization(layer_size, name=name+"_ln_g"), activation, Dropout(p) ]) self.params = self.rz.params + self.g.params
class LNRNN(object): def __init__(self, input_size, layer_size, batch_size, p=0.0, name="", activation=T.tanh, weight_init=Uniform(), persistent=False): self.h = theano.shared(numpy.zeros((batch_size, layer_size), dtype=theano.config.floatX), name=name+"_h_init") self.preact = Sequential([ Linear(input_size+layer_size, layer_size, weight_init=weight_init, name=name+"_fc"), LayerNormalization(layer_size, name=name+"_ln"), activation, Dropout(p) ]) self.params = self.preact.params self.activation = activation self.batch_size = batch_size self.layer_size = layer_size self.input_size = input_size self.persistent = persistent def __call__(self, x): h_init = self.h if self.persistent else T.zeros((x.shape[1], self.layer_size)) h, upd = theano.scan(self.step, x, outputs_info=[h_init]) if self.persistent: upd[self.h] = h[-1] self.updates = upd return h def step(self, x_t, h_tm1): h_t = self.preact(T.concatenate([x_t, h_tm1], axis=1)) return h_t def set_phase(self, train): self.preact.set_phase(train) def reset(self): if self.persistent: self.h.set_value(numpy.zeros((self.batch_size, self.layer_size), dtype=theano.config.floatX))
def __init__(self, input_size, image_feature_size, layer_size, p=0.0, name="", steps=16): self.steps = steps self.lstm = LNLSTM(input_size, layer_size, name=name, p=p) self.init = Sequential([ Linear(image_feature_size, 2 * layer_size, name=name + "_init"), Tanh() ]) self.params = self.init.params + self.lstm.params
def __init__(self, input_size, layer_size, batch_size=1, p=0.0, name="", activation=T.tanh, inner_activation=T.nnet.sigmoid, weight_init=Uniform(), persistent=False): self.h = theano.shared(numpy.zeros((batch_size, layer_size), dtype=theano.config.floatX), name=name+"_h_init") self.c = theano.shared(numpy.zeros((batch_size, layer_size), dtype=theano.config.floatX), name=name+"_c_init") self.params = [] self.preact = Sequential([ Linear(input_size+layer_size, layer_size * 4, weight_init=weight_init, name=name+"_ifog"), LayerNormalization(layer_size * 4, name=name + "_ln") ]) self.params = self.preact.params self.dropout = Dropout(p) self.updates = [] self.activation = activation self.inner_activation = inner_activation self.batch_size = batch_size self.layer_size = layer_size self.persistent = persistent
def __init__(self, kernel_size, kernel_number, input_size, causal=True, dilation=1, weight_init=Uniform(), name="", keepdims=False): assert kernel_number % 2 == 0 assert kernel_size == 3 self.conv = Sequential([ Convolution1d(kernel_size, kernel_number, input_size, pad=dilation, causal=causal, dilation=dilation, weight_init=weight_init, name=name, keepdims=keepdims), BatchNormalization(kernel_number, collect=False, name=name + "_bn"), Gated(), Convolution1d(1, input_size, kernel_number / 2, pad=0, causal=causal, keepdims=keepdims, weight_init=weight_init, name=name + "_1x1"), ]) self.params = self.conv.params
n = 7 # an odd number, so there's one in the center! encoder = model.layers[0].branches[0] sampler = encoder[-1] start_words = numpy.ones(n) * start_word start_words = theano.shared(start_words.astype('int32')) #sampled = theano.shared(sampled.astype(theano.config.floatX)) decoder_from_z = model.layers[1].branches[0] x = T.fmatrix('x') from_z = decoder_from_z(x) #sampled.astype(theano.config.floatX)) layers = model.layers[-3:] layers[0] = LNLSTMStep(layers[0]) step = Sequential(layers) embed = model.layers[1].branches[1].layers[-1] #onehot = OneHot(n_classes + 3) # <unk>, <pad>, <end> I think? words = start_words generated = [] #print(from_z) #print(words) for i in xrange(max_len): #print(onehot(words)) ins = T.concatenate([from_z[i], embed(words)], axis=1) pred = step(ins) words = T.argmax(pred, axis=1)
def main(z, sample_size, p, encdec_layers, lstm_size, pad_string, mode, alpha): vocab = pickle.load(open("data/char_vocab.pkl")) train_db = LmReconstructionDatabase("train", batches_per_epoch=1000, sample_size=sample_size, random_samples=False) valid_db = LmReconstructionDatabase("valid", batches_per_epoch=100, sample_size=sample_size, random_samples=False) model = make_model(z, sample_size, p, train_db.n_classes, encdec_layers, lstm_size, alpha) name = "lm.charvae.z_%d.len_%d.layers_%d.p_%.2f.alpha_%.2f.lstmsz_%d" % \ (z, sample_size, encdec_layers, p, alpha, lstm_size) model.load("exp/%s/model.flt" % name) model.set_phase(train=False) start_word = train_db.n_classes if mode == 'manifold': assert z == 2 steps = 10 eps = 0.001 x = numpy.linspace(eps, 1 - eps, num=steps) y = numpy.linspace(eps, 1 - eps, num=steps) n = steps**2 xy = [(i, j) for i in x for j in y] xy = numpy.asarray(xy) sampled = norm.ppf(xy) elif mode == 'vary': dim = numpy.random.randint(z) print "dimension %d" % dim s = "<unk> caller to a local radio station said cocaine" s = to_inputs(s, vocab, sample_size) encoder = model.layers[0].branches[0] sampler = encoder[-1] assert isinstance(sampler, Sampler) ins = s[:, None] x = T.imatrix() z = encoder(x) mu = sampler.mu f = theano.function([x], mu) z = f(ins.astype('int32')) s_z = z[0] n = 15 eps = 0.001 x = numpy.linspace(eps, 1 - eps, num=n) x = norm.ppf(x) sampled = numpy.repeat(s_z[None, :], n, axis=0) sampled[:, dim] = x elif mode == 'interpolate': s1 = "<unk> caller to a local radio station said cocaine" s2 = "giving up some of its gains as the dollar recovered" s1 = to_inputs(s1, vocab, sample_size) s2 = to_inputs(s2, vocab, sample_size) encoder = model.layers[0].branches[0] sampler = encoder[-1] assert isinstance(sampler, Sampler) ins = numpy.zeros((sample_size, 2)) ins[:, 0] = s1 ins[:, 1] = s2 x = T.imatrix() z = encoder(x) mu = sampler.mu f = theano.function([x], mu) z = f(ins.astype('int32')) s1_z = z[0] s2_z = z[1] n = 15 s1_z = numpy.repeat(s1_z[None, :], n, axis=0) s2_z = numpy.repeat(s2_z[None, :], n, axis=0) steps = numpy.linspace(0, 1, n)[:, None] sampled = s1_z * (1 - steps) + s2_z * steps else: n = 100 sampled = numpy.random.normal(0, 1, (n, z)) start_words = numpy.ones(n) * start_word start_words = theano.shared(start_words.astype('int32')) sampled = theano.shared(sampled.astype(theano.config.floatX)) decoder_from_z = model.layers[1].branches[0] from_z = decoder_from_z(sampled.astype(theano.config.floatX)) layers = model.layers[-3:] layers[0] = LNLSTMStep(layers[0]) step = Sequential(layers) onehot = OneHot(train_db.n_classes + 2) words = start_words generated = [] for i in xrange(sample_size): ins = T.concatenate([from_z[i], onehot(words)], axis=1) pred = step(ins) words = T.argmax(pred, axis=1) generated.append(words[None, :]) generated = T.concatenate(generated, axis=0) f = theano.function([], outputs=generated) w = f() if pad_string not in vocab.word_to_index: vocab.add(pad_string) else: raise Exception("%s is already in the vocabulary" % pad_string) results = [] for i in xrange(w.shape[1]): s = [vocab.by_index(idx) for idx in w[:, i]] r = ''.join(s) print r results.append(r) if mode == 'manifold': lines = 3 steps = int(numpy.sqrt(n)) for i in xrange(steps): for k in xrange(lines): for j in xrange(steps): r = results[i * steps + j] l = len(r) / lines print r[k * l:(k + 1) * l], ' ', print print
def main(z, sample_size, p, lstm_size, mode, alpha): vocab = Vocabulary() vocab.add('<pad>') vocab.add('<unk>') vocab.add('<end>') for i in range(256): ch = chr(i) vocab.add(ch) n_classes = len(vocab) model = make_model(z, sample_size, p, n_classes, lstm_size, alpha) name = "twittervae.charlevel.z_%d.len_%d.p_%.2f.lstmsz_%d.alpha_%.2f" % ( z, sample_size, p, lstm_size, alpha) model.load("exp/%s/model.flt" % name) model.set_phase(train=False) start_word = n_classes if mode == 'vary': n = 7 sampled = numpy.random.normal(0, 1, (1, z)) sampled = numpy.repeat(sampled, n * z, axis=0) for dim in range(z): eps = 0.01 x = numpy.linspace(eps, 1 - eps, num=n) x = norm.ppf(x) sampled[dim * n:(dim + 1) * n, dim] = x n *= z elif mode == 'interpolatereal': valid_db = TwitterReconstructionDatabase("valid", 50, batches_per_epoch=100, max_len=sample_size) s1 = numpy.random.randint(0, len(valid_db.tweets)) s2 = numpy.random.randint(0, len(valid_db.tweets)) encoder = model.layers[0].branches[0] sampler = encoder[-1] assert isinstance(sampler, Sampler) ins = numpy.zeros((sample_size, 2)) ins[:, 0] = valid_db.to_inputs(valid_db.tweets[s1]) ins[:, 1] = valid_db.to_inputs(valid_db.tweets[s2]) x = T.imatrix() z = encoder(x) mu = sampler.mu f = theano.function([x], mu) z = f(ins.astype('int32')) s1_z = z[0] s2_z = z[1] n = 7 s1_z = numpy.repeat(s1_z[None, :], n, axis=0) s2_z = numpy.repeat(s2_z[None, :], n, axis=0) steps = numpy.linspace(0, 1, n)[:, None] sampled = s1_z * (1 - steps) + s2_z * steps elif mode == 'arithm': valid_db = TwitterReconstructionDatabase("valid", 50, batches_per_epoch=100, max_len=sample_size) s1 = numpy.random.randint(0, len(valid_db.tweets)) s2 = numpy.random.randint(0, len(valid_db.tweets)) s3 = numpy.random.randint(0, len(valid_db.tweets)) print(valid_db.tweets[s1]) print(valid_db.tweets[s2]) print(valid_db.tweets[s3]) encoder = model.layers[0].branches[0] sampler = encoder[-1] assert isinstance(sampler, Sampler) ins = numpy.zeros((sample_size, 3)) ins[:, 0] = valid_db.to_inputs(valid_db.tweets[s1]) ins[:, 1] = valid_db.to_inputs(valid_db.tweets[s2]) ins[:, 2] = valid_db.to_inputs(valid_db.tweets[s3]) x = T.imatrix() z = encoder(x) mu = sampler.mu f = theano.function([x], mu) z = f(ins.astype('int32')) s1_z = z[0] s2_z = z[1] s3_z = z[1] n = 1 sampled = s1_z - s2_z + s3_z sampled = sampled[None, :] elif mode == 'interpolate': z = numpy.random.normal(0, 1, (2, z)) s1_z = z[0] s2_z = z[1] n = 7 s1_z = numpy.repeat(s1_z[None, :], n, axis=0) s2_z = numpy.repeat(s2_z[None, :], n, axis=0) steps = numpy.linspace(0, 1, n)[:, None] sampled = s1_z * (1 - steps) + s2_z * steps else: n = 100 sampled = numpy.random.normal(0, 1, (n, z)) start_words = numpy.ones(n) * start_word start_words = theano.shared(start_words.astype('int32')) sampled = theano.shared(sampled.astype(theano.config.floatX)) decoder_from_z = model.layers[1].branches[0] from_z = decoder_from_z(sampled) layers = model.layers[-3:] layers[0] = LNLSTMStep(layers[0]) step = Sequential(layers) embed = model.layers[1].branches[1].layers[-1] words = start_words generated = [] for i in range(sample_size): ins = T.concatenate([from_z[i], embed(words)], axis=1) pred = step(ins) words = T.argmax(pred, axis=1) generated.append(words[None, :]) generated = T.concatenate(generated, axis=0) import time t = time.time() print("compiling...", end=' ') f = theano.function([], outputs=generated) print("done, took %f secs" % (time.time() - t)) w = f() results = [] pad = vocab.by_word("<pad>") end = vocab.by_word("<end>") for i in range(w.shape[1]): s = [] for idx in w[:, i]: if idx == end: break if idx == pad: break s.append(vocab.by_index(idx)) r = ''.join(s) if mode == "vary": if i % n == 0: print("dimension %d" % (i / n)) print(r.strip()) results.append(r)