def create_decoder(z_hat_in, z_noise, num_units, norm_list, layer_num): i = layer_num dense = DenseLayer(z_hat_in, num_units=num_units, name='dec_dense%i' % i, W=init, nonlinearity=identity) normalize = NormalizeLayer(dense, name='dec_normalize%i' % i) u = ScaleAndShiftLayer(normalize, name='dec_scale%i' % i) z_hat = DenoiseLayer(u_net=u, z_net=get_unlab(z_noise), name='dec_denoise%i' % i) mean = ListIndexLayer(norm_list, index=1, name='dec_index_mean%i' % i) var = ListIndexLayer(norm_list, index=2, name='dec_index_var%i' % i) z_hat_bn = DecoderNormalizeLayer(z_hat, mean=mean, var=var, name='dec_decnormalize%i' % i) return z_hat, z_hat_bn
def create_encoder(incoming, num_units, nonlinearity, layer_num): i = layer_num z_pre = DenseLayer( incoming=incoming, num_units=num_units, nonlinearity=identity, b=None, name='enc_dense%i' % i, W=init) norm_list = NormalizeLayer( z_pre, return_stats=True, name='enc_normalize%i' % i, stat_indices=unlabeled_slice) z = ListIndexLayer(norm_list, index=0, name='enc_index%i' % i) z_noise = GaussianNoiseLayer(z, sigma=noise, name='enc_noise%i' % i) h = NonlinearityLayer( ScaleAndShiftLayer(z_noise, name='enc_scale%i' % i), nonlinearity=nonlinearity, name='enc_nonlin%i' % i) return h, z, z_noise, norm_list
num_units=latent_size, nonlinearity=lasagne.nonlinearities.identity, name='ENC_LOG_VAR') #sample layer l_z = SampleLayer(mean=l_mu, log_var=l_log_var, eq_samples=sym_eq_samples, iw_samples=sym_iw_samples) #Normalizing Flow l_logdet_J = [] l_zk = l_z for i in range(nflows): l_nf = NormalizingPlanarFlowLayer(l_zk) l_zk = ListIndexLayer(l_nf, index=0) l_logdet_J += [ListIndexLayer(l_nf, index=1) ] #we need this for the cost function # Generative model q(x|z) l_dec_h1 = denselayer(l_zk, num_units=nhidden, name='DEC_DENSE2', nonlinearity=nonlin_dec) l_dec_h1 = denselayer(l_dec_h1, num_units=nhidden, name='DEC_DENSE1', nonlinearity=nonlin_dec) l_dec_x_mu = lasagne.layers.DenseLayer( l_dec_h1, num_units=num_features,
def __init__(self, settings): # Call constructor of base model super(SRNN_timit, self).__init__() # Define initializers for the parameters if settings.init_rnn == 'uniform': init_rnn = lasagne.init.Uniform(range=settings.init_range) elif settings.init_rnn == 'orthogonal': init_rnn = lasagne.init.Orthogonal() else: raise ValueError('Invalid initializer \'' + settings.init_rnn + '\'') if settings.init_mlp == 'uniform': init_mlp = lasagne.init.GlorotUniform() elif settings.init_mlp == 'normal': init_mlp = lasagne.init.GlorotNormal() else: raise ValueError('Invalid initializer \'' + settings.init_mlp + '\'') # For stability init_last_layer_mlp = lasagne.init.Uniform(range=settings.init_range) def dense_layer(l, num_units, nonlinearity, name, W=init_mlp, b=lasagne.init.Constant(0.)): l = lasagne.layers.DenseLayer(l, num_units=num_units, name=name + "-dense", W=W, b=b, nonlinearity=nonlinearity) return l # Define MLP to be used in the encoding and decoding networks def mlp(input_layer, num_units, nonlinearity, name, num_mlp_layers=1, W=init_mlp, b=lasagne.init.Constant(0.)): output_layer = input_layer for i in range(num_mlp_layers): output_layer = dense_layer(output_layer, num_units=num_units, name=name + '_' + str(i + 1), nonlinearity=nonlinearity, W=W, b=b) return output_layer # Define nonlinearities for encoder and decoder def clipped_very_leaky_rectify(x): return T.clip(theano.tensor.nnet.relu(x, 1. / 3), -settings.range_nonlin, settings.range_nonlin) def get_nonlinearity(nonlin): if nonlin == 'rectify': return lasagne.nonlinearities.rectify elif nonlin == 'very_leaky_rectify': return lasagne.nonlinearities.very_leaky_rectify elif nonlin == 'tanh': return lasagne.nonlinearities.tanh elif nonlin == 'clipped_very_leaky_rectify': return clipped_very_leaky_rectify else: raise ValueError('Invalid non-linearity \'' + nonlin + '\'') nonlin_encoder = get_nonlinearity(settings.nonlinearity_encoder) nonlin_decoder = get_nonlinearity(settings.nonlinearity_decoder) ## INPUTS self.u_sym = T.tensor3() self.u_sym.tag.test_value = np.random.randn( settings.batch_size, settings.sequence_length, settings.output_dim).astype('float32') self.x_sym = T.tensor3() self.x_sym.tag.test_value = np.random.randn( settings.batch_size, settings.sequence_length, settings.output_dim).astype('float32') # To handle sequences of different lengths self.sym_mask = T.matrix() self.sym_mask.tag.test_value = np.ones( (settings.batch_size, settings.sequence_length)).astype('float32') # Input layer for the inputs of the GRU network self.input_layer_u = lasagne.layers.InputLayer( (settings.batch_size, None, settings.output_dim), self.u_sym, name="input_layer_u") self.input_layer_mask = lasagne.layers.InputLayer( (settings.batch_size, None), self.sym_mask, name="input_layer_mask") input_layer_u_flat = lasagne.layers.ReshapeLayer( self.input_layer_u, (-1, settings.output_dim), name="input_layer_u_flat") u_dense1_flat = lasagne.layers.DenseLayer( input_layer_u_flat, num_units=settings.num_hidden_mlp, nonlinearity=nonlin_decoder, name="u_dense1_flat") u_dense2_flat = lasagne.layers.DenseLayer( u_dense1_flat, num_units=settings.num_hidden_mlp, nonlinearity=nonlin_decoder, name="u_dense2_flat") u_emb_dropout_flat = lasagne.layers.DropoutLayer( u_dense2_flat, p=settings.p_emb_u_drop, name="u_mlp_dropout_flat") u_emb_dropout = lasagne.layers.ReshapeLayer( u_emb_dropout_flat, (settings.batch_size, -1, settings.num_hidden_mlp)) ## MODEL SETUP # We first initialize the shared variables for the initial deterministic hidden stated (initialized to 0). Due # to the way we have divided the data in batches we can use the last hidden state of the current batch to # initialize the hidden state of the following batch. self.d_init_sh = theano.shared( np.zeros((settings.batch_size, settings.latent_size_d), dtype=theano.config.floatX)) self.input_layer_d_tm1 = lasagne.layers.InputLayer( (None, settings.latent_size_d), name="input_layer_d_tm1") # We first compute the output from the RNN (deterministic hidden state) # First GRU layer # Inputs: a (batch_size x sequence_length x hidden_layer_dim) matrix coming from the dropout layer # and a (batch_size x hidden_layer_dim) initial hidden state # Output: a (batch_size x sequence_length x latent_size_d) matrix self.d_layer = lasagne.layers.GRULayer( u_emb_dropout, num_units=settings.latent_size_d, resetgate=lasagne.layers.Gate(W_in=init_rnn, W_hid=init_rnn, W_cell=None), updategate=lasagne.layers.Gate(W_in=init_rnn, W_hid=init_rnn, W_cell=None), hidden_update=lasagne.layers.Gate( W_in=init_rnn, W_hid=init_rnn, W_cell=None, nonlinearity=lasagne.nonlinearities.tanh), learn_init=False, hid_init=self.input_layer_d_tm1, mask_input=self.input_layer_mask, unroll_scan=settings.unroll_scan, name="d_layer") # We add dropout to all non-recurrent connections self.d_dropout_layer = lasagne.layers.DropoutLayer( self.d_layer, p=settings.p_d_drop, name="d_dropout_layer") # Define x inputs to the encoder self.input_layer_x = lasagne.layers.InputLayer( (settings.batch_size, None, settings.output_dim), self.x_sym, name="input_layer_x") input_layer_x_flat = lasagne.layers.ReshapeLayer( self.input_layer_x, (-1, settings.output_dim), name="input_layer_x_flat") # I share the parameters (also nonlinearities) with the mlp after u, these are like feature extractors x_dense1_flat = lasagne.layers.DenseLayer( input_layer_x_flat, num_units=settings.num_hidden_mlp, nonlinearity=nonlin_decoder, name="x_dense1_flat", W=u_dense1_flat.W, b=u_dense1_flat.b) x_dense2_flat = lasagne.layers.DenseLayer( x_dense1_flat, num_units=settings.num_hidden_mlp, nonlinearity=nonlin_decoder, name="x_dense2_flat", W=u_dense2_flat.W, b=u_dense2_flat.b) x_emb_dropout_flat = lasagne.layers.DropoutLayer( x_dense2_flat, p=settings.p_emb_x_drop, name="x_mlp_dropout_flat") x_emb_dropout = lasagne.layers.ReshapeLayer( x_emb_dropout_flat, (settings.batch_size, -1, settings.num_hidden_mlp), name='x_emb_dropout') input_a_layer = lasagne.layers.ConcatLayer( [self.d_dropout_layer, x_emb_dropout], axis=2, name="input_a_layer") if settings.smoothing: print "Doing smoothing" # The hidden state is intialized with zeros a_layer = lasagne.layers.GRULayer( input_a_layer, num_units=settings.latent_size_a, resetgate=lasagne.layers.Gate(W_in=init_rnn, W_hid=init_rnn, W_cell=None), updategate=lasagne.layers.Gate(W_in=init_rnn, W_hid=init_rnn, W_cell=None), hidden_update=lasagne.layers.Gate( W_in=init_rnn, W_hid=init_rnn, W_cell=None, nonlinearity=lasagne.nonlinearities.tanh), learn_init=False, backwards=True, unroll_scan=settings.unroll_scan, mask_input=self.input_layer_mask, name="a_layer") else: # We only do filtering print "Doing filtering" input_a_layer_flat = lasagne.layers.ReshapeLayer( input_a_layer, (-1, [2]), name="input_a_layer_flat") a_layer_flat = mlp(input_a_layer_flat, settings.latent_size_a, nonlin_encoder, "a_layer_flat", num_mlp_layers=settings.num_layers_mlp) a_layer = lasagne.layers.ReshapeLayer( a_layer_flat, (settings.batch_size, -1, settings.latent_size_a)) # Define shared variables for quantities to be updated across batches (truncated BPTT) self.z_init_sh = theano.shared( np.zeros((settings.batch_size, settings.latent_size_z), dtype=theano.config.floatX)) self.input_layer_z_tm1 = lasagne.layers.InputLayer( (None, settings.latent_size_z), name="input_layer_z_tm1") self.mean_prior_init_sh = theano.shared( np.zeros((settings.batch_size, settings.latent_size_z), dtype=theano.config.floatX)) self.input_layer_mean_prior_tm1 = lasagne.layers.InputLayer( (None, settings.latent_size_z), name="input_layer_mean_prior_tm1") self.log_var_prior_init_sh = theano.shared( np.zeros((settings.batch_size, settings.latent_size_z), dtype=theano.config.floatX)) self.input_layer_log_var_prior_tm1 = lasagne.layers.InputLayer( (None, settings.latent_size_z), name="input_layer_log_var_prior_tm1") # Define MLPs to be used in StochsticRecurrentLayer mlp_prior_input_dim = settings.latent_size_d + settings.latent_size_z input_prior_mlp = lasagne.layers.InputLayer( (None, mlp_prior_input_dim)) mean_prior_dense1 = lasagne.layers.DenseLayer( input_prior_mlp, num_units=settings.num_hidden_mlp, nonlinearity=nonlin_decoder, name="mean_prior_dense1") mean_prior_dense2 = lasagne.layers.DenseLayer( mean_prior_dense1, num_units=settings.latent_size_z, W=init_last_layer_mlp, nonlinearity=None, name="mean_prior_dense2") log_var_prior_dense1 = lasagne.layers.DenseLayer( input_prior_mlp, num_units=settings.num_hidden_mlp, nonlinearity=nonlin_decoder, name="log_var_prior_dense1") log_var_prior_dense2 = lasagne.layers.DenseLayer( log_var_prior_dense1, num_units=settings.latent_size_z, W=init_last_layer_mlp, nonlinearity=None, name="log_var_prior_dense2") mlp_q_input_dim = settings.latent_size_a + settings.latent_size_z # [input_q_n, z_previous] input_q_mlp = lasagne.layers.InputLayer((None, mlp_q_input_dim)) mean_q_dense1 = lasagne.layers.DenseLayer( input_q_mlp, num_units=settings.num_hidden_mlp, nonlinearity=nonlin_encoder, name="mean_q_dense1") mean_q_dense2 = lasagne.layers.DenseLayer( mean_q_dense1, num_units=settings.latent_size_z, W=init_last_layer_mlp, nonlinearity=None, name="mean_q_dense2") log_var_q_dense1 = lasagne.layers.DenseLayer( input_q_mlp, num_units=settings.num_hidden_mlp, nonlinearity=nonlin_encoder, name="log_var_q_dense1") log_var_q_dense2 = lasagne.layers.DenseLayer( log_var_q_dense1, num_units=settings.latent_size_z, W=init_last_layer_mlp, nonlinearity=None, name="log_var_q_dense2") if settings.cons == 0: cons = 0 elif settings.cons < 0: cons = 10**(settings.cons) else: raise ValueError() stochastic_recurrent_layer = StochsticRecurrentLayer( input_p=self.d_dropout_layer, input_q=a_layer, mu_p_mlp=mean_prior_dense2, logvar_p_mlp=log_var_prior_dense2, q_mu_mlp=mean_q_dense2, q_logvar_mlp=log_var_q_dense2, num_units=settings.latent_size_z, unroll_scan=settings.unroll_scan, use_mu_residual_q=settings.use_mu_residual, z_init=self.input_layer_z_tm1, mu_p_init=self.input_layer_mean_prior_tm1, mask_input=self.input_layer_mask, cons=cons, name='stochastic_recurrent_layer') # ListIndexLayer is needed after a Layer that returns multiple outputs self.z_layer = ListIndexLayer(stochastic_recurrent_layer, index=0, name='z_layer') self.mean_prior_layer = ListIndexLayer(stochastic_recurrent_layer, index=1, name='mean_prior_layer') self.log_var_prior_layer = ListIndexLayer(stochastic_recurrent_layer, index=2, name='log_var_prior_layer') self.mean_q_layer = ListIndexLayer(stochastic_recurrent_layer, index=3, name='mean_q_layer') self.log_var_q_layer = ListIndexLayer(stochastic_recurrent_layer, index=4, name='log_var_q_layer') # Finish the generative model self.z_dropout_layer = lasagne.layers.DropoutLayer( self.z_layer, p=settings.p_z_drop, name="z_dropout_layer") # The softmax mlp needs 2d tensors, hence we reshape here, add the mlp and reshape again d_layer_reshaped = lasagne.layers.ReshapeLayer( self.d_dropout_layer, (-1, settings.latent_size_d), name="d_layer_reshaped") z_layer_reshaped = lasagne.layers.ReshapeLayer( self.z_dropout_layer, (-1, settings.latent_size_z), name="z_layer_reshaped") input_generative_mlp = lasagne.layers.ConcatLayer( [d_layer_reshaped, z_layer_reshaped], axis=1, name="input_generative_mlp") generative_mlp = mlp(input_generative_mlp, settings.num_hidden_mlp, nonlin_decoder, "generative_mlp", num_mlp_layers=settings.num_layers_mlp) # Compute the softmax output and reshape mean_gauss_output_reshaped = lasagne.layers.DenseLayer( generative_mlp, num_units=settings.output_dim, nonlinearity=lasagne.nonlinearities.identity, W=init_last_layer_mlp, name="mean_gauss_output_reshaped") log_var_output_reshaped = lasagne.layers.DenseLayer( generative_mlp, num_units=settings.output_dim, nonlinearity=lasagne.nonlinearities.identity, W=init_last_layer_mlp, name="log_var_output_reshaped") self.mean_gauss_output_layer = lasagne.layers.ReshapeLayer( mean_gauss_output_reshaped, (settings.batch_size, -1, settings.output_dim), name="mean_gauss_output_layer") self.log_var_gauss_output_layer = lasagne.layers.ReshapeLayer( log_var_output_reshaped, (settings.batch_size, -1, settings.output_dim), name="log_var_output_layer") # List of all layers that we need to pass for pickle/model_info (see base model) self.output_layer = [ self.z_layer, self.mean_prior_layer, self.log_var_prior_layer, self.mean_gauss_output_layer, self.log_var_gauss_output_layer ] # Get a list of all parameters in the network. self.model_params = lasagne.layers.get_all_params(self.output_layer) # Get a list of all trainable parameters in the network. self.model_params_trainable = lasagne.layers.get_all_params( self.output_layer, trainable=True)
}).shape h6_dec = get_unlab(l_out_enc) print "y_weights_decoder:", lasagne.layers.get_output(h6_dec, sym_x).eval({ sym_x: x_train[:200] }).shape ############### # DECODER # ############### ##### Decoder Layer 6 u6 = ScaleAndShiftLayer(NormalizeLayer(h6_dec, name='dec_normalize6'), name='dec_scale6') z_hat6 = DenoiseLayer(u_net=u6, z_net=get_unlab(z_noise6), name='dec_denoise6') mean6 = ListIndexLayer(norm_list6, index=1, name='dec_index_mean6') var6 = ListIndexLayer(norm_list6, index=2, name='dec_index_var6') z_hat_bn6 = DecoderNormalizeLayer(z_hat6, mean=mean6, var=var6, name='dec_decnormalize6') ########################### z_hat5, z_hat_bn5 = create_decoder(z_hat6, z_noise5, 250, norm_list5, 5) z_hat4, z_hat_bn4 = create_decoder(z_hat5, z_noise4, 250, norm_list4, 4) z_hat3, z_hat_bn3 = create_decoder(z_hat4, z_noise3, 250, norm_list3, 3) z_hat2, z_hat_bn2 = create_decoder(z_hat3, z_noise2, 500, norm_list2, 2) z_hat1, z_hat_bn1 = create_decoder(z_hat2, z_noise1, 1000, norm_list1, 1) ############################# Decoder Layer 0 # i need this because i also has h0 aka. input layer....
# Recognition model q(z|x) l_in = lasagne.layers.InputLayer((None, num_features)) l_enc_h1 = denselayer(l_in, num_units=nhidden, name='ENC_DENSE1', nonlinearity=nonlin_enc) l_enc_h1 = denselayer(l_enc_h1, num_units=nhidden, name='ENC_DENSE2', nonlinearity=nonlin_enc) l_mu = lasagne.layers.DenseLayer(l_enc_h1, num_units=latent_size, nonlinearity=lasagne.nonlinearities.identity, name='ENC_MU') l_log_var = lasagne.layers.DenseLayer(l_enc_h1, num_units=latent_size, nonlinearity=lasagne.nonlinearities.identity, name='ENC_LOG_VAR') #sample layer l_z = SampleLayer(mean=l_mu, log_var=l_log_var, eq_samples=sym_eq_samples, iw_samples=sym_iw_samples) #Normalizing Flow l_logdet_J = [] l_zk = l_z for i in range(nflows): l_nf = NormalizingPlanarFlowLayer(l_zk) l_zk = ListIndexLayer(l_nf,index=0) l_logdet_J += [ListIndexLayer(l_nf,index=1)] #we need this for the cost function # Generative model q(x|z) l_dec_h1 = denselayer(l_zk, num_units=nhidden, name='DEC_DENSE2', nonlinearity=nonlin_dec) l_dec_h1 = denselayer(l_dec_h1, num_units=nhidden, name='DEC_DENSE1', nonlinearity=nonlin_dec) l_dec_x_mu = lasagne.layers.DenseLayer(l_dec_h1, num_units=num_features, nonlinearity=lasagne.nonlinearities.sigmoid, name='X_MU') # get output needed for evaluating of training i.e with noise if any train_out = lasagne.layers.get_output( [l_z, l_zk, l_mu, l_log_var, l_dec_x_mu]+l_logdet_J, sym_x, deterministic=False ) z_train = train_out[0] zk_train = train_out[1] z_mu_train = train_out[2] z_log_var_train = train_out[3]