def create_std_model(self, X, Y, n_dim, n_out, n_chan=1): # params n_lat = self.n_lat # latent stochastic variabels n_hid = 500 # size of hidden layer in encoder/decoder n_out = n_dim * n_dim * n_chan # total dimensionality of output hid_nl = lasagne.nonlinearities.tanh if self.model == 'bernoulli' \ else T.nnet.softplus # hid_nl = lasagne.nonlinearities.rectified # create the encoder network l_q_in = lasagne.layers.InputLayer(shape=(None, n_chan, n_dim, n_dim), input_var=X) l_q_hid = lasagne.layers.DenseLayer(l_q_in, num_units=n_hid, nonlinearity=hid_nl, name='q_hid') l_q_mu = lasagne.layers.DenseLayer(l_q_hid, num_units=n_lat, nonlinearity=None, name='q_mu') l_q_logsigma = lasagne.layers.DenseLayer(l_q_hid, num_units=n_lat, nonlinearity=None, name='q_logsigma') # create the decoder network l_p_z = GaussianSampleLayer(l_q_mu, l_q_logsigma) l_p_hid = lasagne.layers.DenseLayer(l_p_z, num_units=n_hid, nonlinearity=hid_nl, W=lasagne.init.GlorotUniform(), name='p_hid') l_p_mu, l_p_logsigma = None, None if self.model == 'bernoulli': l_sample = lasagne.layers.DenseLayer( l_p_hid, num_units=n_out, nonlinearity=lasagne.nonlinearities.sigmoid, W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), name='p_sigma') elif self.model == 'gaussian': l_p_mu = lasagne.layers.DenseLayer(l_p_hid, num_units=n_out, nonlinearity=None) # relu_shift is for numerical stability - if training data has any # dimensions where stdev=0, allowing logsigma to approach -inf # will cause the loss function to become NAN. So we set the limit # stdev >= exp(-1 * relu_shift) relu_shift = 10 l_p_logsigma = lasagne.layers.DenseLayer( l_p_hid, num_units=n_out, nonlinearity=lambda a: T.nnet.relu(a + relu_shift ) - relu_shift) l_sample = GaussianSampleLayer(l_p_mu, l_p_logsigma) return l_p_mu, l_p_logsigma, l_q_mu, l_q_logsigma, l_sample, l_p_z
def get_model(interp=False): dims, n_channels = tuple(cfg['dims']), cfg['n_channels'] shape = (None, n_channels) + dims l_in = lasagne.layers.InputLayer(shape=shape) l_enc_conv1 = C2D(incoming=l_in, num_filters=128, filter_size=[5, 5], stride=[2, 2], pad=(2, 2), W=initmethod(0.02), nonlinearity=lrelu(0.2), name='enc_conv1') l_enc_conv2 = BN(C2D(incoming=l_enc_conv1, num_filters=256, filter_size=[5, 5], stride=[2, 2], pad=(2, 2), W=initmethod(0.02), nonlinearity=lrelu(0.2), name='enc_conv2'), name='bnorm2') l_enc_conv3 = BN(C2D(incoming=l_enc_conv2, num_filters=512, filter_size=[5, 5], stride=[2, 2], pad=(2, 2), W=initmethod(0.02), nonlinearity=lrelu(0.2), name='enc_conv3'), name='bnorm3') l_enc_conv4 = BN(C2D(incoming=l_enc_conv3, num_filters=1024, filter_size=[5, 5], stride=[2, 2], pad=(2, 2), W=initmethod(0.02), nonlinearity=lrelu(0.2), name='enc_conv4'), name='bnorm4') print(lasagne.layers.get_output_shape(l_enc_conv4, (196, 3, 64, 64))) l_enc_fc1 = BN(DL(incoming=l_enc_conv4, num_units=1000, W=initmethod(0.02), nonlinearity=relu, name='enc_fc1'), name='bnorm_enc_fc1') # Define latent values l_enc_mu, l_enc_logsigma = [ BN(DL(incoming=l_enc_fc1, num_units=cfg['num_latents'], nonlinearity=None, name='enc_mu'), name='mu_bnorm'), BN(DL(incoming=l_enc_fc1, num_units=cfg['num_latents'], nonlinearity=None, name='enc_logsigma'), name='ls_bnorm') ] l_Z_IAF = GaussianSampleLayer(l_enc_mu, l_enc_logsigma, name='l_Z_IAF') l_IAF_mu, l_IAF_logsigma = [ MADE(l_Z_IAF, [cfg['num_latents']], 'l_IAF_mu'), MADE(l_Z_IAF, [cfg['num_latents']], 'l_IAF_ls') ] l_Z = IAFLayer(l_Z_IAF, l_IAF_mu, l_IAF_logsigma, name='l_Z') l_dec_fc2 = DL(incoming=l_Z, num_units=512 * 16, nonlinearity=lrelu(0.2), W=initmethod(0.02), name='l_dec_fc2') l_unflatten = lasagne.layers.ReshapeLayer( incoming=l_dec_fc2, shape=([0], 512, 4, 4), ) l_dec_conv1 = DeconvLayer(incoming=l_unflatten, num_filters=512, filter_size=[5, 5], stride=[2, 2], crop=(2, 2), W=initmethod(0.02), nonlinearity=None, name='dec_conv1') l_dec_conv2a = MDBLOCK(incoming=l_dec_conv1, num_filters=512, scales=[0, 2], name='dec_conv2a', nonlinearity=lrelu(0.2)) l_dec_conv2 = DeconvLayer(incoming=l_dec_conv2a, num_filters=256, filter_size=[5, 5], stride=[2, 2], crop=(2, 2), W=initmethod(0.02), nonlinearity=None, name='dec_conv2') l_dec_conv3a = MDBLOCK(incoming=l_dec_conv2, num_filters=256, scales=[0, 2, 3], name='dec_conv3a', nonlinearity=lrelu(0.2)) l_dec_conv3 = DeconvLayer(incoming=l_dec_conv3a, num_filters=128, filter_size=[5, 5], stride=[2, 2], crop=(2, 2), W=initmethod(0.02), nonlinearity=None, name='dec_conv3') l_dec_conv4a = MDBLOCK(incoming=l_dec_conv3, num_filters=128, scales=[0, 2, 3], name='dec_conv4a', nonlinearity=lrelu(0.2)) l_dec_conv4 = BN(DeconvLayer(incoming=l_dec_conv4a, num_filters=128, filter_size=[5, 5], stride=[2, 2], crop=(2, 2), W=initmethod(0.02), nonlinearity=lrelu(0.2), name='dec_conv4'), name='bnorm_dc4') R = NL(MDCL(l_dec_conv4, num_filters=2, scales=[2, 3, 4], name='R'), sigmoid) G = NL( ESL([ MDCL(l_dec_conv4, num_filters=2, scales=[2, 3, 4], name='G_a'), MDCL(R, num_filters=2, scales=[2, 3, 4], name='G_b') ]), sigmoid) B = NL( ESL([ MDCL(l_dec_conv4, num_filters=2, scales=[2, 3, 4], name='B_a'), MDCL(CL([R, G]), num_filters=2, scales=[2, 3, 4], name='B_b') ]), sigmoid) l_out = CL([ beta_layer(SL(R, slice(0, 1), 1), SL(R, slice(1, 2), 1)), beta_layer(SL(G, slice(0, 1), 1), SL(G, slice(1, 2), 1)), beta_layer(SL(B, slice(0, 1), 1), SL(B, slice(1, 2), 1)) ]) minibatch_discrim = MinibatchLayer( lasagne.layers.GlobalPoolLayer(l_enc_conv4), num_kernels=500, name='minibatch_discrim') l_discrim = DL(incoming=minibatch_discrim, num_units=3, nonlinearity=lasagne.nonlinearities.softmax, b=None, W=initmethod(0.02), name='discrimi') return { 'l_in': l_in, 'l_out': l_out, 'l_mu': l_enc_mu, 'l_ls': l_enc_logsigma, 'l_Z': l_Z, 'l_IAF_mu': l_IAF_mu, 'l_IAF_ls': l_IAF_logsigma, 'l_Z_IAF': l_Z_IAF, 'l_introspect': [l_enc_conv1, l_enc_conv2, l_enc_conv3, l_enc_conv4], 'l_discrim': l_discrim }
def create_model(self, X, Y, n_dim, n_out, n_chan=1): # params n_lat = 200 # latent stochastic variables n_aux = 10 # auxiliary variables n_hid = 500 # size of hidden layer in encoder/decoder n_hid_cv = 500 # size of hidden layer in control variate net n_out = n_dim * n_dim * n_chan # total dimensionality of ouput hid_nl = lasagne.nonlinearities.tanh relu_shift = lambda av: T.nnet.relu(av + 10 ) - 10 # for numerical stability # create the encoder network # create q(a|x) l_qa_in = lasagne.layers.InputLayer(shape=(None, n_chan, n_dim, n_dim), input_var=X) l_qa_hid = lasagne.layers.DenseLayer(l_qa_in, num_units=n_hid, nonlinearity=hid_nl) l_qa_mu = lasagne.layers.DenseLayer(l_qa_in, num_units=n_aux, nonlinearity=None) l_qa_logsigma = lasagne.layers.DenseLayer(l_qa_in, num_units=n_aux, nonlinearity=relu_shift) l_qa = GaussianSampleLayer(l_qa_mu, l_qa_logsigma) # create q(z|a,x) l_qz_in = lasagne.layers.InputLayer((None, n_aux)) l_qz_hid1a = lasagne.layers.DenseLayer(l_qz_in, num_units=n_hid, nonlinearity=hid_nl) l_qz_hid1b = lasagne.layers.DenseLayer(l_qa_in, num_units=n_hid, nonlinearity=hid_nl) l_qz_hid2 = lasagne.layers.ElemwiseSumLayer([l_qz_hid1a, l_qz_hid1b]) # l_qz_hid2 = lasagne.layers.ConcatLayer([l_qz_hid1a, l_qz_hid1b]) # l_qz_hid2 = lasagne.layers.NonlinearityLayer(l_qz_hid2, hid_nl) # test w/o a: l_qz_hid3 = lasagne.layers.DenseLayer(l_qz_hid2, num_units=n_hid, nonlinearity=hid_nl) l_qz_mu = lasagne.layers.DenseLayer(l_qz_hid3, num_units=n_lat, nonlinearity=T.nnet.sigmoid) l_qz = BernoulliSampleLayer(l_qz_mu) l_qz_logsigma = None # create the decoder network # create p(x|z) l_px_in = lasagne.layers.InputLayer((None, n_lat)) l_px_hid = lasagne.layers.DenseLayer(l_px_in, num_units=n_hid, W=lasagne.init.GlorotUniform(), nonlinearity=hid_nl) l_px_mu, l_px_logsigma = None, None if self.model == 'bernoulli': l_px_mu = lasagne.layers.DenseLayer( l_px_hid, num_units=n_out, nonlinearity=lasagne.nonlinearities.sigmoid) elif self.model == 'gaussian': l_px_mu = lasagne.layers.DenseLayer(l_px_hid, num_units=n_out, nonlinearity=None) l_px_logsigma = lasagne.layers.DenseLayer(l_px_hid, num_units=n_out, nonlinearity=relu_shift) # create p(a|z) l_pa_hid = lasagne.layers.DenseLayer(l_px_in, num_units=n_hid, nonlinearity=hid_nl) l_pa_mu = lasagne.layers.DenseLayer(l_pa_hid, num_units=n_aux, nonlinearity=None) l_pa_logsigma = lasagne.layers.DenseLayer( l_pa_hid, num_units=n_aux, W=lasagne.init.GlorotNormal(), b=lasagne.init.Normal(1e-3), nonlinearity=relu_shift) # create control variate (baseline) network l_cv_in = lasagne.layers.InputLayer(shape=(None, n_chan, n_dim, n_dim), input_var=X) l_cv_hid = lasagne.layers.DenseLayer(l_cv_in, num_units=n_hid_cv, nonlinearity=hid_nl) l_cv = lasagne.layers.DenseLayer(l_cv_hid, num_units=1, nonlinearity=None) # create variables for centering signal c = theano.shared(np.zeros((1, 1), dtype=np.float32), broadcastable=(True, True)) v = theano.shared(np.zeros((1, 1), dtype=np.float32), broadcastable=(True, True)) # store certain input layers for downstream (quick hack) self.input_layers = (l_qa_in, l_qz_in, l_px_in) return l_px_mu, l_px_logsigma, l_pa_mu, l_pa_logsigma, \ l_qa_mu, l_qa_logsigma, l_qz_mu, l_qz_logsigma, \ l_qa, l_qz, l_cv, c, v
def create_model(self, X, Y, n_dim, n_out, n_chan=1): # params n_lat = 64 # latent stochastic variables n_aux = 10 # auxiliary variables n_hid = 500 # size of hidden layer in encoder/decoder n_sam = self.n_sample # number of monte-carlo samples n_hid_cv = 500 # size of hidden layer in control variate net n_out = n_dim * n_dim * n_chan # total dimensionality of ouput hid_nl = lasagne.nonlinearities.tanh relu_shift = lambda av: T.nnet.relu(av + 10 ) - 10 # for numerical stability # self.rbm = RBM(n_dim=int(np.sqrt(n_lat)), n_out=10, n_chan=1, opt_params={'nb':128}) self.rbm = AuxiliaryVariationalRBM(n_dim=int(np.sqrt(n_lat)), n_out=10, n_chan=1, opt_params={'nb': 128 * n_sam}) # create the encoder network # create q(a|x) l_qa_in = lasagne.layers.InputLayer( shape=(None, n_chan, n_dim, n_dim), input_var=X, ) l_qa_hid = lasagne.layers.DenseLayer( l_qa_in, num_units=n_hid, nonlinearity=hid_nl, ) l_qa_mu = lasagne.layers.DenseLayer( l_qa_in, num_units=n_aux, nonlinearity=None, ) l_qa_logsigma = lasagne.layers.DenseLayer( l_qa_in, num_units=n_aux, nonlinearity=relu_shift, ) # repeatedly sample l_qa_mu = lasagne.layers.ReshapeLayer( RepeatLayer(l_qa_mu, n_ax=1, n_rep=n_sam), shape=(-1, n_aux), ) l_qa_logsigma = lasagne.layers.ReshapeLayer( RepeatLayer(l_qa_logsigma, n_ax=1, n_rep=n_sam), shape=(-1, n_aux), ) l_qa = GaussianSampleLayer(l_qa_mu, l_qa_logsigma) # create q(z|a,x) l_qz_in = lasagne.layers.InputLayer((None, n_aux)) l_qz_hid1a = lasagne.layers.DenseLayer( l_qz_in, num_units=n_hid, nonlinearity=hid_nl, ) l_qz_hid1b = lasagne.layers.DenseLayer( l_qa_in, num_units=n_hid, nonlinearity=hid_nl, ) l_qz_hid1b = lasagne.layers.ReshapeLayer( RepeatLayer(l_qz_hid1b, n_ax=1, n_rep=n_sam), shape=(-1, n_hid), ) l_qz_hid2 = lasagne.layers.ElemwiseSumLayer([l_qz_hid1a, l_qz_hid1b]) l_qz_hid3 = lasagne.layers.DenseLayer( l_qz_hid2, num_units=n_hid, nonlinearity=hid_nl, ) l_qz_mu = lasagne.layers.DenseLayer( l_qz_hid3, num_units=n_lat, nonlinearity=T.nnet.sigmoid, ) l_qz = BernoulliSampleLayer(l_qz_mu) l_qz_logsigma = None # create the decoder network # create p(x|z) l_px_in = lasagne.layers.InputLayer((None, n_lat)) l_px_hid = lasagne.layers.DenseLayer( l_px_in, num_units=n_hid, W=lasagne.init.GlorotUniform(), nonlinearity=hid_nl, ) l_px_mu, l_px_logsigma = None, None if self.model == 'bernoulli': l_px_mu = lasagne.layers.DenseLayer( l_px_hid, num_units=n_out, nonlinearity=lasagne.nonlinearities.sigmoid, ) elif self.model == 'gaussian': l_px_mu = lasagne.layers.DenseLayer( l_px_hid, num_units=n_out, nonlinearity=None, ) l_px_logsigma = lasagne.layers.DenseLayer( l_px_hid, num_units=n_out, nonlinearity=relu_shift, ) # create p(a|z) l_pa_hid = lasagne.layers.DenseLayer( l_px_in, num_units=n_hid, nonlinearity=hid_nl, ) l_pa_mu = lasagne.layers.DenseLayer( l_pa_hid, num_units=n_aux, nonlinearity=None, ) l_pa_logsigma = lasagne.layers.DenseLayer( l_pa_hid, num_units=n_aux, W=lasagne.init.GlorotNormal(), b=lasagne.init.Normal(1e-3), nonlinearity=relu_shift, ) # create control variate (baseline) network l_cv_in = lasagne.layers.InputLayer( shape=(None, n_chan, n_dim, n_dim), input_var=X, ) l_cv_hid = lasagne.layers.DenseLayer( l_cv_in, num_units=n_hid_cv, nonlinearity=hid_nl, ) l_cv = lasagne.layers.DenseLayer( l_cv_hid, num_units=1, nonlinearity=None, ) # create variables for centering signal c = theano.shared(np.zeros((1, 1), dtype=np.float64), broadcastable=(True, True)) v = theano.shared(np.zeros((1, 1), dtype=np.float64), broadcastable=(True, True)) # store certain input layers for downstream (quick hack) self.input_layers = (l_qa_in, l_qz_in, l_px_in, l_cv_in) self.n_lat = n_lat self.n_lat2 = int(np.sqrt(n_lat)) self.n_hid = n_hid return l_px_mu, l_px_logsigma, l_pa_mu, l_pa_logsigma, \ l_qa_mu, l_qa_logsigma, l_qz_mu, l_qz_logsigma, \ l_qa, l_qz, l_cv, c, v
def get_model(interp=False): dims, n_channels, n_classes = tuple( cfg['dims']), cfg['n_channels'], cfg['n_classes'] shape = (None, n_channels) + dims l_in = lasagne.layers.InputLayer(shape=shape) l_enc_conv1 = C2D(incoming=l_in, num_filters=128, filter_size=[5, 5], stride=[2, 2], pad=(2, 2), W=initmethod(0.02), nonlinearity=lrelu(0.2), name='enc_conv1') l_enc_conv2 = BN(C2D(incoming=l_enc_conv1, num_filters=256, filter_size=[5, 5], stride=[2, 2], pad=(2, 2), W=initmethod(0.02), nonlinearity=lrelu(0.2), name='enc_conv2'), name='bnorm2') l_enc_conv3 = BN(C2D(incoming=l_enc_conv2, num_filters=512, filter_size=[5, 5], stride=[2, 2], pad=(2, 2), W=initmethod(0.02), nonlinearity=lrelu(0.2), name='enc_conv3'), name='bnorm3') l_enc_conv4 = BN(C2D(incoming=l_enc_conv3, num_filters=1024, filter_size=[5, 5], stride=[2, 2], pad=(2, 2), W=initmethod(0.02), nonlinearity=lrelu(0.2), name='enc_conv4'), name='bnorm4') l_enc_fc1 = BN(DL(incoming=l_enc_conv4, num_units=1000, W=initmethod(0.02), nonlinearity=elu, name='enc_fc1'), name='bnorm_enc_fc1') l_enc_mu, l_enc_logsigma = [ BN(DL(incoming=l_enc_fc1, num_units=cfg['num_latents'], nonlinearity=None, name='enc_mu'), name='mu_bnorm'), BN(DL(incoming=l_enc_fc1, num_units=cfg['num_latents'], nonlinearity=None, name='enc_logsigma'), name='ls_bnorm') ] l_Z = GaussianSampleLayer(l_enc_mu, l_enc_logsigma, name='l_Z') l_dec_fc2 = BN(DL(incoming=l_Z, num_units=1024 * 16, nonlinearity=relu, W=initmethod(0.02), name='l_dec_fc2'), name='bnorm_dec_fc2') l_unflatten = lasagne.layers.ReshapeLayer( incoming=l_dec_fc2, shape=([0], 1024, 4, 4), ) l_dec_conv1 = BN(DeconvLayer(incoming=l_unflatten, num_filters=512, filter_size=[5, 5], stride=[2, 2], crop=(2, 2), W=initmethod(0.02), nonlinearity=relu, name='dec_conv1'), name='bnorm_dc1') l_dec_conv2 = BN(DeconvLayer(incoming=l_dec_conv1, num_filters=256, filter_size=[5, 5], stride=[2, 2], crop=(2, 2), W=initmethod(0.02), nonlinearity=relu, name='dec_conv2'), name='bnorm_dc2') l_dec_conv3 = BN(DeconvLayer(incoming=l_dec_conv2, num_filters=128, filter_size=[5, 5], stride=[2, 2], crop=(2, 2), W=initmethod(0.02), nonlinearity=relu, name='dec_conv3'), name='bnorm_dc3') l_out = DeconvLayer(incoming=l_dec_conv3, num_filters=3, filter_size=[5, 5], stride=[2, 2], crop=(2, 2), W=initmethod(0.02), b=None, nonlinearity=lasagne.nonlinearities.tanh, name='dec_out') minibatch_discrim = MinibatchLayer( lasagne.layers.GlobalPoolLayer(l_enc_conv4), num_kernels=500, name='minibatch_discrim') l_discrim = DL(incoming=minibatch_discrim, num_units=1, nonlinearity=lasagne.nonlinearities.sigmoid, b=None, W=initmethod(), name='discrimi') return { 'l_in': l_in, 'l_out': l_out, 'l_mu': l_enc_mu, 'l_ls': l_enc_logsigma, 'l_latents': l_Z, 'l_introspect': [l_enc_conv1, l_enc_conv2, l_enc_conv3, l_enc_conv4], 'l_discrim': l_discrim }
def get_model(dnn=True): if dnn: import lasagne.layers.dnn from lasagne.layers.dnn import Conv2DDNNLayer as C2D from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable, host_from_gpu, gpu_contiguous, HostFromGpu, gpu_alloc_empty) from theano.sandbox.cuda.dnn import GpuDnnConvDesc, GpuDnnConv, GpuDnnConvGradI, dnn_conv, dnn_pool from layers import DeconvLayer else: import lasagne.layers from lasagne.layers import Conv2DLayer as C2D dims, n_channels, n_classes = tuple(cfg['dims']), cfg['n_channels'], cfg['n_classes'] shape = (None, n_channels)+dims l_in = lasagne.layers.InputLayer(shape=shape) l_enc_conv1 = C2D( incoming = l_in, num_filters = 128, filter_size = [5,5], stride = [2,2], pad = (2,2), W = initmethod(0.02), nonlinearity = lrelu(0.2), flip_filters=False, name = 'enc_conv1' ) l_enc_conv2 = BN(C2D( incoming = l_enc_conv1, num_filters = 256, filter_size = [5,5], stride = [2,2], pad = (2,2), W = initmethod(0.02), nonlinearity = lrelu(0.2), flip_filters=False, name = 'enc_conv2' ),name = 'bnorm2') l_enc_conv3 = BN(C2D( incoming = l_enc_conv2, num_filters = 512, filter_size = [5,5], stride = [2,2], pad = (2,2), W = initmethod(0.02), nonlinearity = lrelu(0.2), flip_filters=False, name = 'enc_conv3' ),name = 'bnorm3') l_enc_conv4 = BN(C2D( incoming = l_enc_conv3, num_filters = 1024, filter_size = [5,5], stride = [2,2], pad = (2,2), W = initmethod(0.02), nonlinearity = lrelu(0.2), flip_filters=False, name = 'enc_conv4' ),name = 'bnorm4') l_enc_fc1 = BN(DL( incoming = l_enc_conv4, num_units = 1000, W = initmethod(0.02), nonlinearity = elu, name = 'enc_fc1' ), name = 'bnorm_enc_fc1') l_enc_mu,l_enc_logsigma = [BN(DL(incoming = l_enc_fc1,num_units=cfg['num_latents'],nonlinearity = None,name='enc_mu'),name='mu_bnorm'), BN(DL(incoming = l_enc_fc1,num_units=cfg['num_latents'],nonlinearity = None,name='enc_logsigma'),name='ls_bnorm')] l_Z = GaussianSampleLayer(l_enc_mu, l_enc_logsigma, name='l_Z') l_dec_fc2 = BN(DL( incoming = l_Z, num_units = 1024*16, nonlinearity = relu, W=initmethod(0.02), name='l_dec_fc2'), name = 'bnorm_dec_fc2') l_unflatten = lasagne.layers.ReshapeLayer( incoming = l_dec_fc2, shape = ([0],1024,4,4), ) if dnn: l_dec_conv1 = BN(DeconvLayer( incoming = l_unflatten, num_filters = 512, filter_size = [5,5], stride = [2,2], crop = (2,2), W = initmethod(0.02), nonlinearity = relu, name = 'dec_conv1' ),name = 'bnorm_dc1') l_dec_conv2 = BN(DeconvLayer( incoming = l_dec_conv1, num_filters = 256, filter_size = [5,5], stride = [2,2], crop = (2,2), W = initmethod(0.02), nonlinearity = relu, name = 'dec_conv2' ),name = 'bnorm_dc2') l_dec_conv3 = BN(DeconvLayer( incoming = l_dec_conv2, num_filters = 128, filter_size = [5,5], stride = [2,2], crop = (2,2), W = initmethod(0.02), nonlinearity = relu, name = 'dec_conv3' ),name = 'bnorm_dc3') l_out = DeconvLayer( incoming = l_dec_conv3, num_filters = 3, filter_size = [5,5], stride = [2,2], crop = (2,2), W = initmethod(0.02), b = None, nonlinearity = lasagne.nonlinearities.tanh, name = 'dec_out' ) else: l_dec_conv1 = SL(SL(BN(TC2D( incoming = l_unflatten, num_filters = 512, filter_size = [5,5], stride = [2,2], crop = (1,1), W = initmethod(0.02), nonlinearity = relu, name = 'dec_conv1' ),name = 'bnorm_dc1'),indices=slice(1,None),axis=2),indices=slice(1,None),axis=3) l_dec_conv2 = SL(SL(BN(TC2D( incoming = l_dec_conv1, num_filters = 256, filter_size = [5,5], stride = [2,2], crop = (1,1), W = initmethod(0.02), nonlinearity = relu, name = 'dec_conv2' ),name = 'bnorm_dc2'),indices=slice(1,None),axis=2),indices=slice(1,None),axis=3) l_dec_conv3 = SL(SL(BN(TC2D( incoming = l_dec_conv2, num_filters = 128, filter_size = [5,5], stride = [2,2], crop = (1,1), W = initmethod(0.02), nonlinearity = relu, name = 'dec_conv3' ),name = 'bnorm_dc3'),indices=slice(1,None),axis=2),indices=slice(1,None),axis=3) l_out = SL(SL(TC2D( incoming = l_dec_conv3, num_filters = 3, filter_size = [5,5], stride = [2,2], crop = (1,1), W = initmethod(0.02), b = None, nonlinearity = lasagne.nonlinearities.tanh, name = 'dec_out' ),indices=slice(1,None),axis=2),indices=slice(1,None),axis=3) # l_in,num_filters=1,filter_size=[5,5],stride=[2,2],crop=[1,1],W=dc.W,b=None,nonlinearity=None) minibatch_discrim = MinibatchLayer(lasagne.layers.GlobalPoolLayer(l_enc_conv4), num_kernels=500,name='minibatch_discrim') l_discrim = DL(incoming = minibatch_discrim, num_units = 1, nonlinearity = lasagne.nonlinearities.sigmoid, b = None, W=initmethod(), name = 'discrimi') return {'l_in':l_in, 'l_out':l_out, 'l_mu':l_enc_mu, 'l_ls':l_enc_logsigma, 'l_Z':l_Z, 'l_introspect':[l_enc_conv1, l_enc_conv2,l_enc_conv3,l_enc_conv4], 'l_discrim' : l_discrim}
def create_svhn_model(self, X, Y, n_dim, n_out, n_chan=1): # params n_lat = 100 # latent stochastic variabels n_out = n_dim * n_dim * n_chan # total dimensionality of output hid_nl = lasagne.nonlinearities.rectified # create the encoder network l_q_in = lasagne.layers.InputLayer(shape=(None, n_chan, n_dim, n_dim), input_var=X) l_q_conv1 = lasagne.layers.Conv2DLayer( l_q_in, num_filters=128, filter_size=(5, 5), stride=2, nonlinearity=lasagne.nonlinearities.leaky_rectify(0.2), pad='same', W=lasagne.init.Normal(5e-2)) l_q_conv2 = nn.batch_norm(lasagne.layers.Conv2DLayer( l_q_conv1, num_filters=256, filter_size=(5, 5), stride=2, nonlinearity=lasagne.nonlinearities.leaky_rectify(0.2), pad='same', W=lasagne.init.Normal(5e-2)), g=None) l_q_conv3 = nn.batch_norm(lasagne.layers.Conv2DLayer( l_q_conv2, num_filters=512, filter_size=(5, 5), stride=2, nonlinearity=lasagne.nonlinearities.leaky_rectify(0.2), pad='same', W=lasagne.init.Normal(5e-2)), g=None) l_q_mu = lasagne.layers.DenseLayer(l_q_conv3, num_units=n_lat, nonlinearity=None, W=lasagne.init.Normal(5e-2)) l_q_logsigma = lasagne.layers.DenseLayer(l_q_conv3, num_units=n_lat, nonlinearity=None, W=lasagne.init.Normal(5e-2)) # create the decoder network l_p_z = GaussianSampleLayer(l_q_mu, l_q_logsigma) l_p_hid1 = nn.batch_norm(lasagne.layers.DenseLayer( l_p_z, num_units=4 * 4 * 512, nonlinearity=hid_nl, W=lasagne.init.Normal(5e-2)), g=None) l_p_hid1 = lasagne.layers.ReshapeLayer(l_p_hid1, (-1, 512, 4, 4)) l_p_hid2 = nn.batch_norm(nn.Deconv2DLayer(l_p_hid1, (self.n_batch, 256, 8, 8), (5, 5), W=lasagne.init.Normal(0.05), nonlinearity=hid_nl), g=None) l_p_hid3 = nn.batch_norm(nn.Deconv2DLayer(l_p_hid2, (self.n_batch, 128, 16, 16), (5, 5), W=lasagne.init.Normal(0.05), nonlinearity=hid_nl), g=None) l_p_mu = nn.weight_norm(nn.Deconv2DLayer( l_p_hid3, (self.n_batch, 3, 32, 32), (5, 5), W=lasagne.init.Normal(0.05), nonlinearity=lasagne.nonlinearities.sigmoid), train_g=True, init_stdv=0.1) l_p_logsigma = nn.weight_norm(nn.Deconv2DLayer( l_p_hid3, (self.n_batch, 3, 32, 32), (5, 5), W=lasagne.init.Normal(0.05), nonlinearity=None), train_g=True, init_stdv=0.1) l_sample = GaussianSampleLayer(l_p_mu, l_p_logsigma) return l_p_mu, l_p_logsigma, l_q_mu, l_q_logsigma, l_sample, l_p_z
def create_deconv_model(self, X, Y, n_dim, n_out, n_chan=1): # params n_lat = 100 # latent stochastic variabels n_out = n_dim * n_dim * n_chan # total dimensionality of output hid_nl = lasagne.nonlinearities.rectify safe_nl = lambda av: T.clip(av, -7, 1) # for numerical stability # create the encoder network l_q_in = lasagne.layers.InputLayer(shape=(None, n_chan, n_dim, n_dim), input_var=X) l_q_conv1 = weight_norm(lasagne.layers.Conv2DLayer( l_q_in, num_filters=128, filter_size=(5, 5), stride=2, nonlinearity=lasagne.nonlinearities.LeakyRectify(0.2), pad='same', W=lasagne.init.Normal(5e-2))) l_q_conv2 = weight_norm(lasagne.layers.Conv2DLayer( l_q_conv1, num_filters=256, filter_size=(5, 5), stride=2, nonlinearity=lasagne.nonlinearities.LeakyRectify(0.2), pad='same', W=lasagne.init.Normal(5e-2))) l_q_conv3 = weight_norm(lasagne.layers.Conv2DLayer( l_q_conv2, num_filters=512, filter_size=(5, 5), stride=2, nonlinearity=lasagne.nonlinearities.LeakyRectify(0.2), pad='same', W=lasagne.init.Normal(5e-2))) l_q_mu = weight_norm(lasagne.layers.DenseLayer( l_q_conv3, num_units=n_lat, nonlinearity=None, W=lasagne.init.Normal(5e-2))) l_q_logsigma = weight_norm(lasagne.layers.DenseLayer( l_q_conv3, num_units=n_lat, nonlinearity=safe_nl, W=lasagne.init.Normal(5e-2))) # create the decoder network l_p_z = GaussianSampleLayer(l_q_mu, l_q_logsigma) l_p_hid1 = weight_norm(lasagne.layers.DenseLayer( l_p_z, num_units=4*4*512, nonlinearity=hid_nl, W=lasagne.init.Normal(5e-2))) l_p_hid1 = lasagne.layers.ReshapeLayer(l_p_hid1, (-1, 512, 4, 4)) l_p_hid2 = lasagne.layers.Upscale2DLayer(l_p_hid1, 2) l_p_hid2 = weight_norm(lasagne.layers.Conv2DLayer(l_p_hid2, num_filters=256, filter_size=(5,5), pad='same', nonlinearity=hid_nl)) l_p_hid3 = lasagne.layers.Upscale2DLayer(l_p_hid2, 2) l_p_hid3 = weight_norm(lasagne.layers.Conv2DLayer(l_p_hid3, num_filters=128, filter_size=(5,5), pad='same', nonlinearity=hid_nl)) l_p_up = lasagne.layers.Upscale2DLayer(l_p_hid3, 2) l_p_mu = lasagne.layers.flatten( weight_norm(lasagne.layers.Conv2DLayer(l_p_up, num_filters=3, filter_size=(5,5), pad='same', nonlinearity=lasagne.nonlinearities.sigmoid))) l_p_logsigma = lasagne.layers.flatten( weight_norm(lasagne.layers.Conv2DLayer(l_p_up, num_filters=3, filter_size=(5,5), pad='same', nonlinearity=safe_nl))) l_sample = GaussianSampleLayer(l_p_mu, l_p_logsigma) return l_p_mu, l_p_logsigma, l_q_mu, l_q_logsigma, l_sample, l_p_z
def create_dadgm_model(self, X, Y, n_dim, n_out, n_chan=1, n_class=10): n_cat = 20 # number of categorical distributions n_lat = n_class * n_cat # latent stochastic variables n_aux = 10 # number of auxiliary variables n_hid = 500 # size of hidden layer in encoder/decoder n_in = n_out = n_dim * n_dim * n_chan tau = self.tau hid_nl = T.nnet.relu relu_shift = lambda av: T.nnet.relu(av + 10) - 10 # create the encoder network # - create q(a|x) qa_net_in = InputLayer(shape=(None, n_in), input_var=X) qa_net = DenseLayer( qa_net_in, num_units=n_hid, W=GlorotNormal('relu'), b=Normal(1e-3), nonlinearity=hid_nl, ) qa_net_mu = DenseLayer( qa_net, num_units=n_aux, W=GlorotNormal(), b=Normal(1e-3), nonlinearity=None, ) qa_net_logsigma = DenseLayer( qa_net, num_units=n_aux, W=GlorotNormal(), b=Normal(1e-3), nonlinearity=relu_shift, ) qa_net_sample = GaussianSampleLayer(qa_net_mu, qa_net_logsigma) # - create q(z|a, x) qz_net_in = lasagne.layers.InputLayer((None, n_aux)) qz_net_a = DenseLayer( qz_net_in, num_units=n_hid, nonlinearity=hid_nl, ) qz_net_b = DenseLayer( qa_net_in, num_units=n_hid, nonlinearity=hid_nl, ) qz_net = ElemwiseSumLayer([qz_net_a, qz_net_b]) qz_net = DenseLayer(qz_net, num_units=n_hid, nonlinearity=hid_nl) qz_net_mu = DenseLayer( qz_net, num_units=n_lat, nonlinearity=None, ) qz_net_mu = reshape(qz_net_mu, (-1, n_class)) qz_net_sample = GumbelSoftmaxSampleLayer(qz_net_mu, tau) qz_net_sample = reshape(qz_net_sample, (-1, n_cat, n_class)) # create the decoder network # - create p(x|z) px_net_in = lasagne.layers.InputLayer((None, n_cat, n_class)) # --- rest is created from RBM --- # - create p(a|z) pa_net = DenseLayer( flatten(px_net_in), num_units=n_hid, W=GlorotNormal('relu'), b=Normal(1e-3), nonlinearity=hid_nl, ) pa_net_mu = DenseLayer( pa_net, num_units=n_aux, W=GlorotNormal(), b=Normal(1e-3), nonlinearity=None, ) pa_net_logsigma = DenseLayer( pa_net, num_units=n_aux, W=GlorotNormal(), b=Normal(1e-3), nonlinearity=relu_shift, ) # save network params self.n_cat = n_cat self.input_layers = (qa_net_in, qz_net_in, px_net_in) return pa_net_mu, pa_net_logsigma, qz_net_mu, \ qa_net_mu, qa_net_logsigma, qz_net_sample, qa_net_sample,
def create_model(self, X, Y, n_dim, n_out, n_chan=1): # params n_lat = 200 # latent stochastic variables n_aux = 10 # auxiliary variables n_hid = 499 # size of hidden layer in encoder/decoder n_sam = self.n_sample # number of monte-carlo samples n_out = n_dim * n_dim * n_chan # total dimensionality of ouput hid_nl = lasagne.nonlinearities.rectify relu_shift = lambda av: T.nnet.relu(av + 10 ) - 10 # for numerical stability # create the encoder network # create q(a|x) l_qa_in = lasagne.layers.InputLayer( shape=(None, n_chan, n_dim, n_dim), input_var=X, ) l_qa_hid = lasagne.layers.DenseLayer( l_qa_in, num_units=n_hid, W=lasagne.init.GlorotNormal('relu'), b=lasagne.init.Normal(1e-3), nonlinearity=hid_nl, ) l_qa_mu = lasagne.layers.DenseLayer( l_qa_hid, num_units=n_aux, W=lasagne.init.GlorotNormal(), b=lasagne.init.Normal(1e-3), nonlinearity=None, ) l_qa_logsigma = lasagne.layers.DenseLayer( l_qa_hid, num_units=n_aux, W=lasagne.init.GlorotNormal(), b=lasagne.init.Normal(1e-3), nonlinearity=relu_shift, ) # repeatedly sample l_qa_mu = lasagne.layers.ReshapeLayer( RepeatLayer(l_qa_mu, n_ax=1, n_rep=n_sam), shape=(-1, n_aux), ) l_qa_logsigma = lasagne.layers.ReshapeLayer( RepeatLayer(l_qa_logsigma, n_ax=1, n_rep=n_sam), shape=(-1, n_aux), ) l_qa = GaussianSampleLayer(l_qa_mu, l_qa_logsigma) # create q(z|a,x) l_qz_hid1a = lasagne.layers.DenseLayer( l_qa, num_units=n_hid, W=lasagne.init.GlorotNormal('relu'), b=lasagne.init.Normal(1e-3), nonlinearity=hid_nl, ) l_qz_hid1b = lasagne.layers.DenseLayer( l_qa_in, num_units=n_hid, W=lasagne.init.GlorotNormal('relu'), b=lasagne.init.Normal(1e-3), nonlinearity=hid_nl, ) l_qz_hid1b = lasagne.layers.ReshapeLayer( RepeatLayer(l_qz_hid1b, n_ax=1, n_rep=n_sam), shape=(-1, n_hid), ) l_qz_hid2 = lasagne.layers.ElemwiseSumLayer([l_qz_hid1a, l_qz_hid1b]) l_qz_hid2 = lasagne.layers.NonlinearityLayer(l_qz_hid2, hid_nl) l_qz_mu = lasagne.layers.DenseLayer( l_qz_hid2, num_units=n_lat, W=lasagne.init.GlorotNormal(), b=lasagne.init.Normal(1e-3), nonlinearity=None, ) l_qz_logsigma = lasagne.layers.DenseLayer( l_qz_hid2, num_units=n_lat, W=lasagne.init.GlorotNormal(), b=lasagne.init.Normal(1e-3), nonlinearity=relu_shift, ) l_qz = GaussianSampleLayer(l_qz_mu, l_qz_logsigma) # create the decoder network # create p(x|z) l_px_in = lasagne.layers.InputLayer((None, n_lat)) l_px_hid = lasagne.layers.DenseLayer( l_px_in, num_units=n_hid, W=lasagne.init.GlorotNormal('relu'), b=lasagne.init.Normal(1e-3), nonlinearity=hid_nl, ) l_px_mu, l_px_logsigma = None, None if self.model == 'bernoulli': l_px_mu = lasagne.layers.DenseLayer( l_px_hid, num_units=n_out, nonlinearity=lasagne.nonlinearities.sigmoid, W=lasagne.init.GlorotUniform(), b=lasagne.init.Normal(1e-3), ) elif self.model == 'gaussian': l_px_mu = lasagne.layers.DenseLayer( l_px_hid, num_units=n_out, nonlinearity=None, ) l_px_logsigma = lasagne.layers.DenseLayer( l_px_hid, num_units=n_out, nonlinearity=relu_shift, ) # create p(a|z) l_pa_hid = lasagne.layers.DenseLayer( l_px_in, num_units=n_hid, nonlinearity=hid_nl, W=lasagne.init.GlorotNormal('relu'), b=lasagne.init.Normal(1e-3), ) l_pa_mu = lasagne.layers.DenseLayer( l_pa_hid, num_units=n_aux, W=lasagne.init.GlorotNormal(), b=lasagne.init.Normal(1e-3), nonlinearity=None, ) l_pa_logsigma = lasagne.layers.DenseLayer( l_pa_hid, num_units=n_aux, W=lasagne.init.GlorotNormal(), b=lasagne.init.Normal(1e-3), nonlinearity=relu_shift, ) self.input_layers = (l_qa_in, l_px_in) self.n_lat = n_lat self.n_hid = n_hid return l_px_mu, l_px_logsigma, l_pa_mu, l_pa_logsigma, \ l_qz_mu, l_qz_logsigma, l_qa_mu, l_qa_logsigma, \ l_qa, l_qz