def build(P, input_size, hidden_sizes): test_time = False def activation(X): global test_time if not test_time: mask = U.theano_rng.binomial(size=X.shape, p=0.5) return T.switch(mask, T.nnet.relu(X), 0) else: return 0.5 * T.nnet.relu(X) classifier = feedforward.build_classifier( P, name="classifier", input_sizes=[input_size], hidden_sizes=hidden_sizes, output_size=1, initial_weights=feedforward.relu_init, output_initial_weights=lambda x,y: np.zeros((x,y)), activation=activation, output_activation=T.nnet.sigmoid) def predict(X, test=False): global test_time test_time = test return classifier([X])[:, 0] return predict
def build_decoder(P, latent_size, hidden_size, output_size): decode_ = feedforward.build_classifier( P, name='decoder', input_sizes=[latent_size], hidden_sizes=[hidden_size], output_size=output_size, initial_weights=feedforward.relu_init, activation=T.nnet.softplus, output_activation=T.nnet.sigmoid) def decode(X): return decode_([X])[1] return decode
def build(P,structure,weights_file,training=True): input_size = structure[0] layer_sizes = structure[1:-1] output_size = structure[-1] classifier = feedforward.build_classifier( P, "classifier", [input_size], layer_sizes, output_size, activation=T.nnet.sigmoid ) def predict(X): hiddens, outputs = classifier([X]) return hiddens, outputs if weights_file != "": P.load(weights_file) return predict
def build(P, structure, weights_file, training=True): input_size = structure[0] layer_sizes = structure[1:-1] output_size = structure[-1] classifier = feedforward.build_classifier(P, "classifier", [input_size], layer_sizes, output_size, activation=T.nnet.sigmoid) def predict(X): hiddens, outputs = classifier([X]) return hiddens, outputs if weights_file != "": P.load(weights_file) return predict
def build(P, name, input_size=200, z_size=200, hidden_layer_size=2500, x_extractor_layers=[600] * 4, z_extractor_layers=[500] * 4, prior_layers=[500] * 4, generation_layers=[600] * 4, inference_layers=[500] * 4): def weight_init(x,y): return np.random.uniform(-0.08, 0.08, (x,y)) X_extractor = feedforward.build_classifier( P, "x_extractor", input_sizes=[input_size], hidden_sizes=x_extractor_layers[:-1], output_size=x_extractor_layers[-1], initial_weights=weight_init, output_initial_weights=weight_init, activation=T.nnet.relu, output_activation=T.nnet.relu ) Z_extractor = feedforward.build_classifier( P, "z_extractor", input_sizes=[z_size], hidden_sizes=z_extractor_layers[:-1], output_size=z_extractor_layers[-1], initial_weights=weight_init, output_initial_weights=weight_init, activation=T.nnet.relu, output_activation=T.nnet.relu ) prior = vae.build_inferer( P, "prior", input_sizes=[hidden_layer_size], hidden_sizes=prior_layers, output_size=z_size, initial_weights=weight_init, activation=T.nnet.relu, initialise_outputs=False ) generate = vae.build_inferer( P, "generator", input_sizes=[hidden_layer_size, z_extractor_layers[-1]], hidden_sizes=generation_layers, output_size=input_size, initial_weights=weight_init, activation=T.nnet.relu, initialise_outputs=False ) P.init_recurrence_hidden = np.zeros((hidden_layer_size,)) P.init_recurrence_cell = np.zeros((hidden_layer_size,)) recurrence = lstm.build_step( P, "recurrence", input_sizes=[x_extractor_layers[-1],z_extractor_layers[-1]], hidden_size=hidden_layer_size ) infer = vae.build_inferer( P, "infer", input_sizes=[hidden_layer_size, x_extractor_layers[-1]], hidden_sizes=generation_layers, output_size=z_size, initial_weights=weight_init, activation=T.nnet.relu, initialise_outputs=False ) def sample(): init_hidden = T.tanh(P.init_recurrence_hidden) init_cell = P.init_recurrence_cell init_hidden_batch = T.alloc(init_hidden, 1, hidden_layer_size) init_cell_batch = T.alloc(init_cell, 1, hidden_layer_size) noise = U.theano_rng.normal(size=(40,1,z_size)) def _step(eps, prev_cell, prev_hidden): _, z_prior_mean, z_prior_logvar = prior([prev_hidden]) z_sample = z_prior_mean + eps * T.exp(0.5 * z_prior_logvar) z_feat = Z_extractor([z_sample]) _, x_mean, _ = generate([prev_hidden, z_feat]) x_feat = X_extractor([x_mean]) curr_cell, curr_hidden = recurrence(x_feat, z_feat, prev_cell, prev_hidden) return curr_cell, curr_hidden, x_mean [cells,hiddens,x_means],_ = theano.scan( _step, sequences=[noise], outputs_info=[init_cell_batch,init_hidden_batch,None], ) return x_means def extract(X,l): init_hidden = T.tanh(P.init_recurrence_hidden) init_cell = P.init_recurrence_cell init_hidden_batch = T.alloc(init_hidden, X.shape[1], hidden_layer_size) init_cell_batch = T.alloc(init_cell, X.shape[1], hidden_layer_size) noise = U.theano_rng.normal(size=(X.shape[0],X.shape[1],z_size)) reset_init_mask = U.theano_rng.binomial(size=(X.shape[0],X.shape[1]),p=0.00) X_feat = X_extractor([X]) def _step(t,x_feat, eps, reset_mask, prev_cell, prev_hidden): reset_mask = reset_mask.dimshuffle(0,'x') _, z_prior_mean, z_prior_logvar = prior([prev_hidden]) _, z_mean, z_logvar = infer([prev_hidden, x_feat]) z_sample = z_mean + eps * T.exp(0.5 * z_logvar) z_feat = Z_extractor([z_sample]) _, x_mean, x_logvar = generate([prev_hidden, z_feat]) curr_cell, curr_hidden = recurrence(x_feat, z_feat, prev_cell, prev_hidden) curr_cell = T.switch( reset_mask, init_cell_batch, curr_cell) curr_hidden = T.switch( reset_mask, init_hidden_batch, curr_hidden) mask = (t < l).dimshuffle(0,'x') return tuple( T.switch(mask,out,0) for out in ( curr_cell, curr_hidden, z_prior_mean, z_prior_logvar, z_sample, z_mean, z_logvar, x_mean, x_logvar )) [_, _, Z_prior_mean, Z_prior_logvar, Z_sample, Z_mean, Z_logvar, X_mean, X_logvar], _ = theano.scan( _step, sequences=[T.arange(X_feat.shape[0]),X_feat,noise,reset_init_mask], outputs_info=[init_cell_batch, init_hidden_batch] + [None] * 7, ) return [ Z_prior_mean, Z_prior_logvar, Z_mean, Z_logvar, X_mean, X_logvar, ] return extract, sample
def build(P, input_size, output_size, mem_size, mem_width, controller_size): head_count = 1 P.memory_init = np.random.randn(mem_size, mem_width) weight_init_params = [] for i in xrange(head_count): P['read_weight_init_%d' % i] = 0.01 * np.random.randn(mem_size) P['write_weight_init_%d' % i] = 0.01 * np.random.randn(mem_size) weight_init_params.append((P['read_weight_init_%d' % i], P['write_weight_init_%d' % i])) # weight_init_params.append((init,init)) heads_size, head_activations = head.build( head_count=head_count, mem_width=mem_width, shift_width=3 ) print "Size of heads:", heads_size def controller_activation(X): return (head_activations(X[:, :heads_size]), X[:, heads_size:]) def output_inits(ins, outs): init = feedforward.initial_weights(ins, outs) init[:, heads_size:] = 0 return init controller = feedforward.build_classifier( P, "controller", input_sizes=[input_size, mem_width], hidden_sizes=[controller_size], output_size=heads_size + output_size, activation=T.tanh, output_activation=controller_activation, output_initial_weights=output_inits ) ntm_step = ntm.build(mem_size, mem_width) def process(X): # input_sequences: batch_size x sequence_length x input_size memory_init = P.memory_init / T.sqrt(T.sum(T.sqr(P.memory_init), axis=1, keepdims=True)) batch_size = X.shape[0] batch_size.name = 'batch_size' ones = T.ones_like(X[:, 0, 0]) batch_memory_init = T.alloc(memory_init, batch_size, mem_size, mem_width) batch_memory_init.name = 'batch_memory_init' import head batch_weight_inits = [ ( head.softmax(r) * ones.dimshuffle(0, 'x'), head.softmax(w) * ones.dimshuffle(0, 'x') ) for r, w in weight_init_params] def step(X, M_prev, *heads): X.name = 'x[t]' # weights [ batch_size x mem_size ] # M_prev [ batch_size x mem_size x mem_width ] weights_prev = zip(heads[0*head_count:1*head_count], heads[1*head_count:2*head_count]) for r, w in weights_prev: r.name = 'read_prev' w.name = 'write_prev' reads_prev = [T.sum(r.dimshuffle(0, 1, 'x') * M_prev, axis=1) for r, _ in weights_prev] heads, output = controller([X] + reads_prev) M_curr, weights_curr = ntm_step(M_prev, heads, weights_prev) return [M_curr] + \ [r for r, _ in weights_curr] +\ [w for _, w in weights_curr] +\ [output] scan_outs, _ = theano.scan( step, sequences=[X.dimshuffle(1, 0, 2)], outputs_info=[batch_memory_init] + [r for r, _ in batch_weight_inits] + [w for _, w in batch_weight_inits] + [None] ) outputs = scan_outs[-1] return outputs.dimshuffle(1, 0, 2) return process
def build(P, name, input_size=200, z_size=200, hidden_layer_size=2500, x_extractor_layers=[600] * 4, z_extractor_layers=[500] * 4, prior_layers=[500] * 4, generation_layers=[600] * 4, inference_layers=[500] * 4): def weight_init(x, y): return np.random.uniform(-0.08, 0.08, (x, y)) X_extractor = feedforward.build_classifier( P, "x_extractor", input_sizes=[input_size], hidden_sizes=x_extractor_layers[:-1], output_size=x_extractor_layers[-1], initial_weights=weight_init, output_initial_weights=weight_init, activation=T.nnet.relu, output_activation=T.nnet.relu) Z_extractor = feedforward.build_classifier( P, "z_extractor", input_sizes=[z_size], hidden_sizes=z_extractor_layers[:-1], output_size=z_extractor_layers[-1], initial_weights=weight_init, output_initial_weights=weight_init, activation=T.nnet.relu, output_activation=T.nnet.relu) prior = vae.build_inferer(P, "prior", input_sizes=[hidden_layer_size], hidden_sizes=prior_layers, output_size=z_size, initial_weights=weight_init, activation=T.nnet.relu, initialise_outputs=True) generate = vae.build_inferer( P, "generator", input_sizes=[hidden_layer_size, z_extractor_layers[-1]], hidden_sizes=generation_layers, output_size=input_size, initial_weights=weight_init, activation=T.nnet.relu, initialise_outputs=True) P.init_recurrence_hidden = np.zeros((hidden_layer_size, )) P.init_recurrence_cell = np.zeros((hidden_layer_size, )) recurrence = lstm.build_step( P, "recurrence", input_sizes=[x_extractor_layers[-1], z_extractor_layers[-1]], hidden_size=hidden_layer_size) infer = vae.build_inferer( P, "infer", input_sizes=[hidden_layer_size, x_extractor_layers[-1]], hidden_sizes=generation_layers, output_size=z_size, initial_weights=weight_init, activation=T.nnet.relu, initialise_outputs=True) def sample(): init_hidden = T.tanh(P.init_recurrence_hidden) init_cell = P.init_recurrence_cell init_hidden_batch = T.alloc(init_hidden, 1, hidden_layer_size) init_cell_batch = T.alloc(init_cell, 1, hidden_layer_size) noise = U.theano_rng.normal(size=(40, 1, z_size)) def _step(eps, prev_cell, prev_hidden): _, z_prior_mean, z_prior_std = prior([prev_hidden]) z_sample = z_prior_mean + eps * z_prior_std z_feat = Z_extractor([z_sample]) _, x_mean, _ = generate([prev_hidden, z_feat]) x_feat = X_extractor([x_mean]) curr_cell, curr_hidden = recurrence(x_feat, z_feat, prev_cell, prev_hidden) return curr_cell, curr_hidden, x_mean [cells, hiddens, x_means], _ = theano.scan( _step, sequences=[noise], outputs_info=[init_cell_batch, init_hidden_batch, None], ) return x_means def extract(X, l): init_hidden = T.tanh(P.init_recurrence_hidden) init_cell = P.init_recurrence_cell init_hidden_batch = T.alloc(init_hidden, X.shape[1], hidden_layer_size) init_cell_batch = T.alloc(init_cell, X.shape[1], hidden_layer_size) noise = U.theano_rng.normal(size=(X.shape[0], X.shape[1], z_size)) reset_init_mask = U.theano_rng.binomial(size=(X.shape[0], X.shape[1]), p=0.025) X_feat = X_extractor([X]) def _step(t, x_feat, eps, reset_mask, prev_cell, prev_hidden): reset_mask = reset_mask.dimshuffle(0, 'x') _, z_prior_mean, z_prior_std = prior([prev_hidden]) _, z_mean, z_std = infer([prev_hidden, x_feat]) z_sample = z_mean + eps * z_std z_feat = Z_extractor([z_sample]) _, x_mean, x_std = generate([prev_hidden, z_feat]) curr_cell, curr_hidden = recurrence(x_feat, z_feat, prev_cell, prev_hidden) curr_cell = T.switch(reset_mask, init_cell_batch, curr_cell) curr_hidden = T.switch(reset_mask, init_hidden_batch, curr_hidden) mask = (t < l).dimshuffle(0, 'x') return tuple( T.switch(mask, out, 0) for out in (curr_cell, curr_hidden, z_prior_mean, z_prior_std, z_sample, z_mean, z_std, x_mean, x_std)) [ _, _, Z_prior_mean, Z_prior_std, Z_sample, Z_mean, Z_std, X_mean, X_std ], _ = theano.scan( _step, sequences=[ T.arange(X_feat.shape[0]), X_feat, noise, reset_init_mask ], outputs_info=[init_cell_batch, init_hidden_batch] + [None] * 7, ) return [ Z_prior_mean, Z_prior_std, Z_mean, Z_std, X_mean, X_std, ] return extract, sample