def __init__(self,
                 rng,
                 input_source,
                 input_target,
                 label_source,
                 batch_size,
                 struct,
                 coef,
                 train=False,
                 init_params=None):
        """Initialize the parameters for the multilayer perceptron
        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights
        :type input_source: theano.tensor.TensorType
        :param input: symbolic variable that describes the "Source Domain" input of the architecture (one minibatch)
        
        :type input_target: theano.tensor.TensorType
        :param input: symbolic variable that describes the "Target Domain" input of the architecture (one minibatch)        
        :type xxx_struct: class NN_struct
        :param xxx_strucat: define the structure of each NN
        """

        if train == True:
            # batch size is [source batch, target batch]
            batch_size[
                0] = batch_size[0] * coef.L  # Source Batch size  * Sample num
            batch_size[
                1] = batch_size[1] * coef.L  # Target Batch size  * Sample num
            tmp_S = input_source
            tmp_T = input_target
            tmp_l = label_source
            for i in range(coef.L - 1):  # sample num - 1 since 0 index start
                # T = Theano tensor
                # adding lists of temp vars and arguments to Theano tensor along x/0 axis
                tmp_S = T.concatenate([tmp_S, input_source], axis=0)
                tmp_T = T.concatenate([tmp_T, input_target], axis=0)
                tmp_l = T.concatenate([tmp_l, label_source], axis=0)
            input_source = tmp_S
            input_target = tmp_T
            label_source = tmp_l
            L = coef.L  # L = Sample number

        # if testing
        else:
            L = 1

        self.L = L

        self.struct = struct
        # encoding
        encoder1_struct = struct.encoder1
        encoder2_struct = struct.encoder2
        encoder3_struct = struct.encoder3  # bottleneck
        # decodinf
        decoder1_struct = struct.decoder1
        decoder2_struct = struct.decoder2

        alpha = coef.alpha  # classification weight
        beta = coef.beta  # MMD Weight
        chi = coef.chi  # criteria for reconstruction
        D = coef.D  # number of random features for fast MMD
        optimize = coef.optimize

        # if parameters are not initialized, then update
        if init_params == None:
            init_params = VFAE_params()

        #------------------------------------------------------------------------
        #Encoder 1 Neural Network: present q_\phi({z_y}_n | x_n, d_n)

        # what is this saying?
        zero_v_S = T.zeros([batch_size[0], 1], dtype=theano.config.floatX
                           )  # 1D array of zeros based on source batch size
        zero_v_T = T.zeros([batch_size[1], 1], dtype=theano.config.floatX
                           )  # 1D array of zeros based on target batch size
        one_v_S = T.ones([batch_size[0], 1], dtype=theano.config.floatX
                         )  # 1D array of ones based on source batch size
        one_v_T = T.ones([batch_size[1], 1], dtype=theano.config.floatX
                         )  # 1D array of ones based on target batch size

        d_source = T.concatenate([zero_v_S, one_v_S],
                                 axis=1)  # zero source + one source
        xd_source = T.concatenate(
            [input_source, d_source],
            axis=1)  # input source + zero source + one source
        d_target = T.concatenate([one_v_T, zero_v_T],
                                 axis=1)  # one target + zero target
        xd_target = T.concatenate(
            [input_target, d_target],
            axis=1)  # input target + one target + zero target

        self.Encoder1 = nn.Gaussian_MLP(
            rng=rng,  # numpy random state
            input_source=xd_source,  # input source + zero source + one source
            input_target=xd_target,  # input target + one target + zero target
            struct=encoder1_struct,
            batch_size=batch_size,
            params=init_params.EC1_params,
            name='Encoder1')

        zy_dim = encoder1_struct.mu.layer_dim[-1]
        self.EC_zy_S_mu = self.Encoder1.S_mu
        self.EC_zy_S_log_sigma = self.Encoder1.S_log_sigma
        self.EC_zy_S_sigma = T.exp(self.EC_zy_S_log_sigma)
        self.EC_zy_T_mu = self.Encoder1.T_mu
        self.EC_zy_T_log_sigma = self.Encoder1.T_log_sigma
        self.EC_zy_T_sigma = T.exp(self.EC_zy_T_log_sigma)

        self.zy_S = self.Encoder1.S_output
        self.zy_T = self.Encoder1.T_output

        self.Encoder1_params = self.Encoder1.params  # get parameters from params list
        self.Encoder1_learning_rate = self.Encoder1.learning_rate
        self.Encoder1_decay = self.Encoder1.decay

        #------------------------------------------------------------------------
        #Encoder 3 Neural Network: present q_\phi(y_n | {z_1}_n)
        self.Encoder3_pi = nn.NN_Block(rng=rng,
                                       input_source=self.zy_S,
                                       input_target=self.zy_T,
                                       struct=encoder3_struct,
                                       params=init_params.EC3_params,
                                       name='Encoder3_pi')

        #Sample layer
        self.EC_3_CSL_target = nn.CatSampleLayer(
            pi=self.Encoder3_pi.output_target,
            n_in=encoder3_struct.layer_dim[-1],
            batch_size=batch_size[1])
        y_dim = encoder3_struct.layer_dim[-1]
        self.EC_y_S_pi = self.Encoder3_pi.output_source
        self.EC_y_T_pi = self.Encoder3_pi.output_target

        self.y_T = self.EC_3_CSL_target.output

        self.Encoder3_params = self.Encoder3_pi.params
        self.Encoder3_learning_rate = self.Encoder3_pi.learning_rate
        self.Encoder3_decay = self.Encoder3_pi.decay

        #------------------------------------------------------------------------
        #Encoder 2 Neural Network: present q_\phi({a_y}_n | {z_y}_n, y_n)
        #Input Append
        zyy_source = T.concatenate([self.zy_S, label_source], axis=1)
        zyy_target = T.concatenate([self.zy_T, self.y_T], axis=1)

        self.Encoder2 = nn.Gaussian_MLP(rng=rng,
                                        input_source=zyy_source,
                                        input_target=zyy_target,
                                        struct=encoder2_struct,
                                        batch_size=batch_size,
                                        params=init_params.EC2_params,
                                        name='Encoder2')

        ay_dim = encoder2_struct.mu.layer_dim[-1]
        self.EC_ay_S_mu = self.Encoder2.S_mu
        self.EC_ay_S_log_sigma = self.Encoder2.S_log_sigma
        self.EC_ay_S_sigma = T.exp(self.EC_ay_S_log_sigma)
        self.EC_ay_T_mu = self.Encoder2.T_mu
        self.EC_ay_T_log_sigma = self.Encoder2.T_log_sigma
        self.EC_ay_T_sigma = T.exp(self.EC_ay_T_log_sigma)

        self.ay_S = self.Encoder2.S_output
        self.ay_T = self.Encoder2.T_output

        self.Encoder2_params = self.Encoder2.params
        self.Encoder2_learning_rate = self.Encoder2.learning_rate
        self.Encoder2_decay = self.Encoder2.decay

        #------------------------------------------------------------------------
        #Decoder 1 Neural Network: present p_\theta(x_n | {z_1}_n, d_n)
        zyd_source = T.concatenate([self.zy_S, d_source], axis=1)
        zyd_target = T.concatenate([self.zy_T, d_target], axis=1)

        self.Decoder1 = nn.Gaussian_MLP(rng=rng,
                                        input_source=zyd_source,
                                        input_target=zyd_target,
                                        struct=decoder1_struct,
                                        batch_size=batch_size,
                                        params=init_params.DC1_params,
                                        name='Decoder1')

        x_dim = decoder1_struct.mu.layer_dim[-1]
        self.DC_x_S_mu = self.Decoder1.S_mu
        self.DC_x_S_log_sigma = self.Decoder1.S_log_sigma
        self.DC_x_S_sigma = T.exp(self.DC_x_S_log_sigma)
        self.DC_x_T_mu = self.Decoder1.T_mu
        self.DC_x_T_log_sigma = self.Decoder1.T_log_sigma
        self.DC_x_T_sigma = T.exp(self.DC_x_T_log_sigma)

        self.Decoder1_params = self.Decoder1.params
        self.Decoder1_learning_rate = self.Decoder1.learning_rate
        self.Decoder1_decay = self.Decoder1.decay

        #------------------------------------------------------------------------
        #Decoder 2 Neural Network: present p_\theta({z_y}_n | {a_y}_n, y_n)
        ayy_source = T.concatenate([self.ay_S, label_source], axis=1)
        ayy_target = T.concatenate([self.ay_T, self.y_T], axis=1)

        self.Decoder2 = nn.Gaussian_MLP(rng=rng,
                                        input_source=ayy_source,
                                        input_target=ayy_target,
                                        struct=decoder2_struct,
                                        batch_size=batch_size,
                                        params=init_params.DC2_params,
                                        name='Decoder2')

        self.DC_zy_S_mu = self.Decoder2.S_mu
        self.DC_zy_S_log_sigma = self.Decoder2.S_log_sigma
        self.DC_zy_S_sigma = T.exp(self.DC_zy_S_log_sigma)
        self.DC_zy_T_mu = self.Decoder2.T_mu
        self.DC_zy_T_log_sigma = self.Decoder2.T_log_sigma
        self.DC_zy_T_sigma = T.exp(self.DC_zy_T_log_sigma)

        self.Decoder2_params = self.Decoder2.params
        self.Decoder2_learning_rate = self.Decoder2.learning_rate
        self.Decoder2_decay = self.Decoder2.decay

        #------------------------------------------------------------------------
        # Error Function Set
        # KL(q(zy)||p(zy)) -----------
        self.KL_zy_source = er.KLGaussianGaussian(
            self.EC_zy_S_mu, self.EC_zy_S_log_sigma, self.DC_zy_S_mu,
            self.DC_zy_S_log_sigma).sum()
        self.KL_zy_target = er.KLGaussianGaussian(
            self.EC_zy_T_mu, self.EC_zy_T_log_sigma, self.DC_zy_T_mu,
            self.DC_zy_T_log_sigma).sum()

        # KL(q(ay)||p(ay)) -----------
        self.KL_ay_source = er.KLGaussianStdGaussian(
            self.EC_ay_S_mu, self.EC_ay_S_log_sigma).sum()
        self.KL_ay_target = er.KLGaussianStdGaussian(
            self.EC_ay_T_mu, self.EC_ay_T_log_sigma).sum()

        # KL(q(y)||p(y)) only target data-----------
        # prior of y is set to 1/K, K is category number
        threshold = 0.0000001
        pi_0 = T.ones([batch_size[1], y_dim],
                      dtype=theano.config.floatX) / y_dim
        self.KL_y_target = T.sum(
            -self.EC_y_T_pi *
            T.log(T.maximum(self.EC_y_T_pi / pi_0, threshold)),
            axis=1).sum()

        # Likelihood q(y) only source data-----------
        self.LH_y_source = -T.sum(
            -label_source * T.log(T.maximum(self.EC_y_S_pi, threshold)),
            axis=1).sum()

        # Likelihood p(x) ----------- if gaussian
        self.LH_x_source = er.LogGaussianPDF(input_source, self.DC_x_S_mu,
                                             self.DC_x_S_log_sigma).sum()
        self.LH_x_target = er.LogGaussianPDF(input_target, self.DC_x_T_mu,
                                             self.DC_x_T_log_sigma).sum()

        # MMD betwween s, x using gaussian kernel-----------
        #self.MMD = MMD(self.zy_S, self.zy_T, batch_size)
        self.MMD = er.MMDEstimator(rng, self.zy_S, self.zy_T, zy_dim,
                                   batch_size, D)

        #Cost function
        tmp = self.KL_zy_source + self.KL_zy_target + self.KL_ay_source + self.KL_ay_target \
            + self.LH_x_source*chi + self.LH_x_target*chi + self.KL_y_target + self.LH_y_source * alpha
        self.cost = -tmp / (batch_size[0] + batch_size[1]) + self.MMD * beta

        # the parameters of the model
        self.params = self.Encoder1_params + self.Encoder2_params + self.Encoder3_params + self.Decoder1_params + self.Decoder2_params
        self.learning_rate = self.Encoder1_learning_rate + self.Encoder2_learning_rate + self.Encoder3_learning_rate \
        + self.Decoder1_learning_rate + self.Decoder2_learning_rate
        self.decay = self.Encoder1_decay + self.Encoder2_decay + self.Encoder3_decay + self.Decoder1_decay + self.Decoder2_decay

        if optimize == 'Adam_update' and train:
            #Adam update function
            self.updates = nn.adam(loss=self.cost,
                                   all_params=self.params,
                                   all_learning_rate=self.learning_rate)
        elif optimize == 'SGD' and train:
            #Standard update function
            gparams = [T.grad(self.cost, param) for param in self.params]

            self.params_updates = [
                (param, param - learning_rate * gparam)
                for param, gparam, learning_rate in zip(
                    self.params, gparams, self.learning_rate)
            ]

            self.learning_rate_update = [
                (learning_rate, learning_rate * decay)
                for learning_rate, decay in zip(self.learning_rate, self.decay)
            ]

            self.updates = self.params_updates + self.learning_rate_update

        # keep track of model input
        self.input_source = input_source
        self.input_target = input_target

        #Predict Label
        self.y_pred_source = T.argmax(self.EC_y_S_pi, axis=1)
        self.y_pred_target = T.argmax(self.EC_y_T_pi, axis=1)
Example #2
0
    def __init__(self, rng, input_source, input_target, label_source,
                 label_target, batch_size, encoder1_struct, encoder2_struct,
                 encoder3_struct, decoder1_struct, decoder2_struct, alpha,
                 beta, D):
        """Initialize the parameters for the multilayer perceptron

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input_source: theano.tensor.TensorType
        :param input: symbolic variable that describes the "Source Domain" input of the architecture (one minibatch)
        
        :type input_target: theano.tensor.TensorType
        :param input: symbolic variable that describes the "Target Domain" input of the architecture (one minibatch)        

        :type xxx_struct: class NN_struct
        :param xxx_strucat: define the structure of each NN
        """
        #------------------------------------------------------------------------
        #Encoder 1 Neural Network: present q_\phi({z_y}_n | x_n, d_n)
        d_source = T.zeros([batch_size, 1], dtype=theano.config.floatX)
        xd_source = T.concatenate([input_source, d_source], axis=1)
        d_target = T.ones([batch_size, 1], dtype=theano.config.floatX)
        xd_target = T.concatenate([input_target, d_target], axis=1)

        self.Encoder1_mu = nn.NN_Block_0L(rng=rng,
                                          input_source=xd_source,
                                          input_target=xd_target,
                                          struct=encoder1_struct,
                                          name='Encoder1_mu')

        self.Encoder1_sigma = nn.NN_Block_0L(rng=rng,
                                             input_source=xd_source,
                                             input_target=xd_target,
                                             struct=encoder1_struct,
                                             name='Encoder1_sigma')

        #Sample layer
        self.EC_1_GSL_source = nn.GaussianSampleLayer(
            mu=self.Encoder1_mu.output_source,
            log_sigma=self.Encoder1_sigma.output_source,
            n_in=encoder1_struct.layer_dim[-1],
            batch_size=batch_size)

        self.EC_1_GSL_target = nn.GaussianSampleLayer(
            mu=self.Encoder1_mu.output_target,
            log_sigma=self.Encoder1_sigma.output_target,
            n_in=encoder1_struct.layer_dim[-1],
            batch_size=batch_size)

        zy_dim = encoder1_struct.layer_dim[-1]
        self.EC_zy_S_mu = self.Encoder1_mu.output_source
        self.EC_zy_S_log_sigma = self.Encoder1_sigma.output_source
        self.EC_zy_S_sigma = T.exp(self.EC_zy_S_log_sigma)
        self.EC_zy_T_mu = self.Encoder1_mu.output_target
        self.EC_zy_T_log_sigma = self.Encoder1_sigma.output_target
        self.EC_zy_T_sigma = T.exp(self.EC_zy_T_log_sigma)

        self.zy_S = self.EC_1_GSL_source.output
        self.zy_T = self.EC_1_GSL_target.output

        self.Encoder1_params = self.Encoder1_mu.params + self.Encoder1_sigma.params
        #self.Encoder1_outputs = [("EC_zy_S_mu", self.EC_zy_S_mu), ("EC_zy_S_log_sigma", self.EC_zy_S_log_sigma), ("zy_S", self.zy_S),
        #                        ("EC_zy_T_mu", self.EC_zy_T_mu), ("EC_zy_T_log_sigma", self.EC_zy_T_log_sigma), ("zy_T", self.zy_T)]
        self.Encoder1_outputs = [
            self.EC_zy_S_mu, self.EC_zy_S_log_sigma, self.zy_S,
            self.EC_zy_T_mu, self.EC_zy_T_log_sigma, self.zy_T
        ]
        self.Encoder1_outputs_name = [
            "EC_zy_S_mu", "EC_zy_S_log_sigma", "zy_S", "EC_zy_T_mu",
            "EC_zy_T_log_sigma", "zy_T"
        ]

        #------------------------------------------------------------------------
        #Encoder 3 Neural Network: present q_\phi(y_n | {z_1}_n)
        self.Encoder3_pi = nn.NN_Block_0L(rng=rng,
                                          input_source=self.zy_S,
                                          input_target=self.zy_T,
                                          struct=encoder3_struct,
                                          name='Encoder3_pi')

        #Sample layer
        self.EC_3_CSL_target = nn.CatSampleLayer(
            pi=self.Encoder3_pi.output_target,
            n_in=encoder3_struct.layer_dim[-1],
            batch_size=batch_size)
        y_dim = encoder3_struct.layer_dim[-1]
        self.EC_y_S_pi = self.Encoder3_pi.output_source
        self.EC_y_T_pi = self.Encoder3_pi.output_target

        self.Encoder3_params = self.Encoder3_pi.params
        #self.Encoder3_outputs = [("EC_y_S_pi",self.EC_y_S_pi), ("EC_y_T_pi",self.EC_y_T_pi), ("y_T",self.y_T)]
        self.Encoder3_outputs = [self.EC_y_S_pi, self.EC_y_T_pi]
        self.Encoder3_outputs_name = ["EC_y_S_pi", "EC_y_T_pi"]

        #------------------------------------------------------------------------
        #Encoder 2 Neural Network: present q_\phi({a_y}_n | {z_y}_n, y_n)
        #Input Append
        zyy_source = T.concatenate([self.zy_S, label_source], axis=1)
        zyy_target = T.concatenate([self.zy_T, label_target], axis=1)

        self.Encoder2_mu = nn.NN_Block_0L(rng=rng,
                                          input_source=zyy_source,
                                          input_target=zyy_target,
                                          struct=encoder2_struct,
                                          name='Encoder2_mu')

        self.Encoder2_sigma = nn.NN_Block_0L(rng=rng,
                                             input_source=zyy_source,
                                             input_target=zyy_target,
                                             struct=encoder2_struct,
                                             name='Encoder2_sigma')

        #Sample layer
        self.EC_2_GSL_source = nn.GaussianSampleLayer(
            mu=self.Encoder2_mu.output_source,
            log_sigma=self.Encoder2_sigma.output_source,
            n_in=encoder2_struct.layer_dim[-1],
            batch_size=batch_size)

        self.EC_2_GSL_target = nn.GaussianSampleLayer(
            mu=self.Encoder2_mu.output_target,
            log_sigma=self.Encoder2_sigma.output_target,
            n_in=encoder2_struct.layer_dim[-1],
            batch_size=batch_size)

        ay_dim = encoder2_struct.layer_dim[-1]
        self.EC_ay_S_mu = self.Encoder2_mu.output_source
        self.EC_ay_S_log_sigma = self.Encoder2_sigma.output_source
        self.EC_ay_S_sigma = T.exp(self.EC_ay_S_log_sigma)
        self.EC_ay_T_mu = self.Encoder2_mu.output_target
        self.EC_ay_T_log_sigma = self.Encoder2_sigma.output_target
        self.EC_ay_T_sigma = T.exp(self.EC_ay_T_log_sigma)

        self.ay_S = self.EC_2_GSL_source.output
        self.ay_T = self.EC_2_GSL_target.output

        self.Encoder2_params = self.Encoder2_mu.params + self.Encoder2_sigma.params
        #self.Encoder2_outputs = [("EC_ay_S_mu", self.EC_ay_S_mu), ("EC_ay_S_log_sigma", self.EC_ay_S_log_sigma), ("ay_S", self.ay_S),
        #                         ("EC_ay_T_mu",self.EC_ay_T_mu), ("EC_ay_T_log_sigma",self.EC_ay_T_log_sigma), ("ay_T", self.ay_T)]
        self.Encoder2_outputs = [
            self.EC_ay_S_mu, self.EC_ay_S_log_sigma, self.ay_S,
            self.EC_ay_T_mu, self.EC_ay_T_log_sigma, self.ay_T
        ]
        self.Encoder2_outputs_name = [
            "EC_ay_S_mu", "EC_ay_S_log_sigma", "ay_S", "EC_ay_T_mu",
            "EC_ay_T_log_sigma", "ay_T"
        ]

        #------------------------------------------------------------------------
        #Decoder 1 Neural Network: present p_\theta(x_n | {z_1}_n, s_n)
        zyd_source = T.concatenate([self.zy_S, d_source], axis=1)
        zyd_target = T.concatenate([self.zy_T, d_target], axis=1)

        self.Decoder1_mu = nn.NN_Block_0L(rng=rng,
                                          input_source=zyd_source,
                                          input_target=zyd_target,
                                          struct=decoder1_struct,
                                          name='Decoder1_mu')

        self.Decoder1_sigma = nn.NN_Block_0L(rng=rng,
                                             input_source=zyd_source,
                                             input_target=zyd_target,
                                             struct=decoder1_struct,
                                             name='Decoder1_sigma')
        '''
        #Sample layer
        self.DC_1_GSL_source = GaussianSampleLayer(
            mu=self.Decoder1_mu.output_source,
            log_sigma=self.Decoder1_sigma.output_source,
            n_in = decoder1_struct.layer_dim[-1],
            batch_size = batch_size
        )
        
        self.DC_1_GSL_target = GaussianSampleLayer(
            mu=self.Decoder1_mu.output_target,
            log_sigma=self.Decoder1_sigma.output_target,
            n_in = decoder1_struct.layer_dim[-1],
            batch_size = batch_size            
        )        
        '''

        x_dim = decoder1_struct.layer_dim[-1]
        self.DC_x_S_mu = self.Decoder1_mu.output_source
        self.DC_x_S_log_sigma = self.Decoder1_sigma.output_source
        self.DC_x_S_sigma = T.exp(self.DC_x_S_log_sigma)
        self.DC_x_T_mu = self.Decoder1_mu.output_target
        self.DC_x_T_log_sigma = self.Decoder1_sigma.output_target
        self.DC_x_T_sigma = T.exp(self.DC_x_T_log_sigma)

        #self.reconstructed_x_S = self.DC_1_GSL_source.output
        #self.reconstructed_x_T = self.DC_1_GSL_target.output

        self.Decoder1_params = self.Decoder1_mu.params + self.Decoder1_sigma.params
        #self.Decoder1_outputs = [("DC_x_S_mu", self.DC_x_S_mu), ("DC_x_S_log_sigma", self.DC_x_S_log_sigma),
        #                         ("DC_x_T_mu", self.DC_x_T_mu), ("DC_x_T_log_sigma", self.DC_x_T_log_sigma)]
        self.Decoder1_outputs = [
            self.DC_x_S_mu, self.DC_x_S_log_sigma, self.DC_x_T_mu,
            self.DC_x_T_log_sigma
        ]
        self.Decoder1_outputs_name = [
            "DC_x_S_mu", "DC_x_S_log_sigma", "DC_x_T_mu", "DC_x_T_log_sigma"
        ]

        #------------------------------------------------------------------------
        #Decoder 2 Neural Network: present p_\theta({z_y}_n | {a_y}_n, y_n)
        ayy_source = T.concatenate([self.ay_S, label_source], axis=1)
        ayy_target = T.concatenate([self.ay_T, label_target], axis=1)

        self.Decoder2_mu = nn.NN_Block_0L(rng=rng,
                                          input_source=ayy_source,
                                          input_target=ayy_target,
                                          struct=decoder2_struct,
                                          name='Decoder2_mu')

        self.Decoder2_sigma = nn.NN_Block_0L(rng=rng,
                                             input_source=ayy_source,
                                             input_target=ayy_target,
                                             struct=decoder2_struct,
                                             name='Decoder2_sigma')

        self.DC_zy_S_mu = self.Decoder2_mu.output_source
        self.DC_zy_S_log_sigma = self.Decoder2_sigma.output_source
        self.DC_zy_S_sigma = T.exp(self.DC_zy_S_log_sigma)
        self.DC_zy_T_mu = self.Decoder2_mu.output_target
        self.DC_zy_T_log_sigma = self.Decoder2_sigma.output_target
        self.DC_zy_T_sigma = T.exp(self.DC_zy_T_log_sigma)

        self.Decoder2_params = self.Decoder2_mu.params + self.Decoder2_sigma.params
        #self.Decoder2_outputs = [("DC_zy_S_mu", self.DC_zy_S_mu), ("DC_zy_S_log_sigma", self.DC_zy_S_log_sigma),
        #                         ("DC_zy_T_mu", self.DC_zy_T_mu), ("DC_zy_T_log_sigma", self.DC_zy_T_log_sigma)]
        self.Decoder2_outputs = [
            self.DC_zy_S_mu, self.DC_zy_S_log_sigma, self.DC_zy_T_mu,
            self.DC_zy_T_log_sigma
        ]
        self.Decoder2_outputs_name = [
            "DC_zy_S_mu", "DC_zy_S_log_sigma", "DC_zy_T_mu",
            "DC_zy_T_log_sigma"
        ]
        #19 20 21 22

        #------------------------------------------------------------------------
        # Error Function Set
        # KL(q(zy)||p(zy)) -----------
        self.KL_zy_source = er.KLGaussianGaussian(self.EC_zy_S_mu,
                                                  self.EC_zy_S_log_sigma,
                                                  self.DC_zy_S_mu,
                                                  self.DC_zy_S_log_sigma)
        self.KL_zy_target = er.KLGaussianGaussian(self.EC_zy_T_mu,
                                                  self.EC_zy_T_log_sigma,
                                                  self.DC_zy_T_mu,
                                                  self.DC_zy_T_log_sigma)

        # KL(q(ay)||p(ay)) -----------
        self.KL_ay_source = er.KLGaussianStdGaussian(self.EC_ay_S_mu,
                                                     self.EC_ay_S_log_sigma)
        self.KL_ay_target = er.KLGaussianStdGaussian(self.EC_ay_T_mu,
                                                     self.EC_ay_T_log_sigma)

        threshold = 0.0000001
        # Likelihood q(y) only source data-----------
        self.LH_y_source = -T.sum(
            -label_source * T.log(T.maximum(self.EC_y_S_pi, threshold)),
            axis=1)
        self.LH_y_target = -T.sum(
            -label_target * T.log(T.maximum(self.EC_y_T_pi, threshold)),
            axis=1)
        #self.LH_y_source = T.nnet.nnet.categorical_crossentropy(self.EC_y_S_pi, label_source)

        # Likelihood p(x) ----------- if gaussian
        self.LH_x_source = er.LogGaussianPDF(input_source, self.DC_x_S_mu,
                                             self.DC_x_S_log_sigma)
        self.LH_x_target = er.LogGaussianPDF(input_target, self.DC_x_T_mu,
                                             self.DC_x_T_log_sigma)
        #self.LH_x_source = - T.nnet.binary_crossentropy(self.reconstructed_x_S, input_source)
        #self.LH_x_target = - T.nnet.binary_crossentropy(self.reconstructed_x_T, input_target)

        # MMD betwween s, x using gaussian kernel-----------
        #self.MMD = MMD(self.zy_S, self.zy_T, batch_size)
        self.MMD = er.MMDEstimator(rng, self.zy_S, self.zy_T, zy_dim,
                                   batch_size, D)

        #Cost function
        tmp = self.KL_zy_source + self.KL_zy_target + self.KL_ay_source + self.KL_ay_target \
            + self.LH_x_source + self.LH_x_target + self.LH_y_source * alpha + self.LH_y_target * alpha
        self.cost = -tmp.mean() + self.MMD * beta

        # the parameters of the model
        self.params = self.Encoder1_params + self.Encoder2_params + self.Encoder3_params + self.Decoder1_params + self.Decoder2_params

        # all output of VAE
        self.outputs = self.Encoder1_outputs + self.Encoder2_outputs + self.Encoder3_outputs + self.Decoder1_outputs + self.Decoder2_outputs
        self.outputs_name = self.Encoder1_outputs_name + self.Encoder2_outputs_name + self.Encoder3_outputs_name \
            + self.Decoder1_outputs_name + self.Decoder2_outputs_name

        # keep track of model input
        self.input_source = input_source
        self.input_target = input_target

        #Predict Label
        self.y_pred_source = T.argmax(self.EC_y_S_pi, axis=1)
        self.y_pred_target = T.argmax(self.EC_y_T_pi, axis=1)
Example #3
0
    def __init__(self,
                 rng,
                 input_source,
                 input_target,
                 label_source,
                 batch_size,
                 struct,
                 coef,
                 train=False,
                 init_params=None):
        """Initialize the parameters for the multilayer perceptron

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input_source: theano.tensor.TensorType
        :param input: symbolic variable that describes the "Source Domain" input of the architecture (one minibatch)
        
        :type input_target: theano.tensor.TensorType
        :param input: symbolic variable that describes the "Target Domain" input of the architecture (one minibatch)        

        :type xxx_struct: class NN_struct
        :param xxx_strucat: define the structure of each NN
        """

        self.struct = struct
        encoder_struct = struct.encoder
        classifier_struct = struct.classifier

        beta = coef.beta
        D = coef.D
        optimize = coef.optimize

        if init_params == None:
            init_params = NN_params()

        #------------------------------------------------------------------------
        #Encoder Neural Network
        self.Encoder = nn.NN_Block(rng=rng,
                                   input_source=input_source,
                                   input_target=input_target,
                                   struct=encoder_struct,
                                   params=init_params.EC_params,
                                   name='Encoder')

        self.z_S = self.Encoder.output_source
        self.z_T = self.Encoder.output_target

        z_dim = encoder_struct.layer_dim[-1]

        self.Encoder_params = self.Encoder.params
        self.Encoder_learning_rate = self.Encoder.learning_rate
        self.Encoder_decay = self.Encoder.decay

        #------------------------------------------------------------------------
        #Decoder Neural Network
        self.Classifier = nn.NN_Block(rng=rng,
                                      input_source=self.z_S,
                                      input_target=self.z_T,
                                      struct=classifier_struct,
                                      params=init_params.CF_params,
                                      name='Classifier')

        self.y_S = self.Classifier.output_source
        self.y_T = self.Classifier.output_target

        self.Classifier_params = self.Classifier.params
        self.Classifier_learning_rate = self.Classifier.learning_rate
        self.Classifier_decay = self.Classifier.decay

        #------------------------------------------------------------------------
        # Error Function Set
        # Classification only source data-----------
        threshold = 0.0000001
        self.Error_source = T.mean(
            T.sum(-label_source * T.log(T.maximum(self.y_S, threshold)),
                  axis=1))

        # MMD betwween s, x using gaussian kernel-----------
        #self.MMD = MMD(self.zy_S, self.zy_T, batch_size)
        self.MMD = er.MMDEstimator(rng, self.z_S, self.z_T, z_dim, batch_size,
                                   D)

        #Cost function
        self.cost = self.Error_source + self.MMD * beta

        # the parameters of the model
        self.params = self.Encoder_params + self.Classifier_params
        self.learning_rate = self.Encoder_learning_rate + self.Classifier_learning_rate
        self.decay = self.Encoder_decay + self.Classifier_decay

        if optimize == 'Adam_update' and train:
            #Adam update function
            self.updates = nn.adam(loss=self.cost,
                                   all_params=self.params,
                                   all_learning_rate=self.learning_rate)
        elif optimize == 'SGD' and train:
            #Standard update function
            gparams = [T.grad(self.cost, param) for param in self.params]

            self.params_updates = [
                (param, param - learning_rate * gparam)
                for param, gparam, learning_rate in zip(
                    self.params, gparams, self.learning_rate)
            ]

            self.learning_rate_update = [
                (learning_rate, learning_rate * decay)
                for learning_rate, decay in zip(self.learning_rate, self.decay)
            ]

            self.updates = self.params_updates + self.learning_rate_update

        # keep track of model input
        self.input_source = input_source
        self.input_target = input_target

        #Predict Label
        self.y_pred_source = T.argmax(self.y_S, axis=1)
        self.y_pred_target = T.argmax(self.y_T, axis=1)
Example #4
0
    def __init__(self,
                 rng,
                 input_source,
                 input_target,
                 label_source,
                 batch_size,
                 struct,
                 coef,
                 train=False,
                 init_params=None):
        """Initialize the parameters for the multilayer perceptron

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input_source: theano.tensor.TensorType
        :param input: symbolic variable that describes the "Source Domain" input of the architecture (one minibatch)
        
        :type input_target: theano.tensor.TensorType
        :param input: symbolic variable that describes the "Target Domain" input of the architecture (one minibatch)        

        :type xxx_struct: class NN_struct
        :param xxx_strucat: define the structure of each NN
        """

        if train == True:
            batch_size[0] = batch_size[0] * coef.L
            batch_size[1] = batch_size[1] * coef.L
            tmp_S = input_source
            tmp_T = input_target
            tmp_l = label_source
            for i in range(coef.L - 1):
                tmp_S = T.concatenate([tmp_S, input_source], axis=0)
                tmp_T = T.concatenate([tmp_T, input_target], axis=0)
                tmp_l = T.concatenate([tmp_l, label_source], axis=0)
            input_source = tmp_S
            input_target = tmp_T
            label_source = tmp_l
            L = coef.L
        else:
            L = 1

        self.L = L

        self.struct = struct
        encoder1_struct = struct.encoder1
        encoder2_struct = struct.encoder2
        encoder3_struct = struct.encoder3
        encoder4_struct = struct.encoder4
        encoder5_struct = struct.encoder5
        encoderX_struct = struct.encoderX
        decoder1_struct = struct.decoder1
        decoder2_struct = struct.decoder2
        decoder3_struct = struct.decoder3

        alpha = coef.alpha
        beta = coef.beta
        chi = coef.chi
        gamma = coef.gamma
        D = coef.D
        optimize = coef.optimize

        if init_params == None:
            init_params = VLDF_params()

        #------------------------------------------------------------------------
        #Encoder 1 Neural Network: present q_\phi({z_y}_n | x_n, d_n)
        zero_v_S = T.zeros([batch_size[0], 1], dtype=theano.config.floatX)
        zero_v_T = T.zeros([batch_size[1], 1], dtype=theano.config.floatX)
        one_v_S = T.ones([batch_size[0], 1], dtype=theano.config.floatX)
        one_v_T = T.ones([batch_size[1], 1], dtype=theano.config.floatX)

        d_source = T.concatenate([zero_v_S, one_v_S], axis=1)
        xd_source = T.concatenate([input_source, d_source], axis=1)
        d_target = T.concatenate([one_v_T, zero_v_T], axis=1)
        xd_target = T.concatenate([input_target, d_target], axis=1)

        self.Encoder1 = nn.Gaussian_MLP(rng=rng,
                                        input_source=xd_source,
                                        input_target=xd_target,
                                        struct=encoder1_struct,
                                        batch_size=batch_size,
                                        params=init_params.EC1_params,
                                        name='Encoder1')

        zy_dim = encoder1_struct.mu.layer_dim[-1]
        self.EC_zy_S_mu = self.Encoder1.S_mu
        self.EC_zy_S_log_sigma = self.Encoder1.S_log_sigma
        self.EC_zy_S_sigma = T.exp(self.EC_zy_S_log_sigma)
        self.EC_zy_T_mu = self.Encoder1.T_mu
        self.EC_zy_T_log_sigma = self.Encoder1.T_log_sigma
        self.EC_zy_T_sigma = T.exp(self.EC_zy_T_log_sigma)

        self.zy_S = self.Encoder1.S_output
        self.zy_T = self.Encoder1.T_output

        self.Encoder1_params = self.Encoder1.params
        self.Encoder1_learning_rate = self.Encoder1.learning_rate
        self.Encoder1_decay = self.Encoder1.decay

        #------------------------------------------------------------------------
        #Encoder 5 Neural Network: present q_\phi(y_n | {z_y}_n)
        self.Encoder5_pi = nn.NN_Block(rng=rng,
                                       input_source=self.zy_S,
                                       input_target=self.zy_T,
                                       struct=encoder5_struct,
                                       params=init_params.EC5_params,
                                       name='Encoder5_pi')

        #Sample layer
        self.EC_5_CSL_target = nn.CatSampleLayer(
            pi=self.Encoder5_pi.output_target,
            n_in=encoder5_struct.layer_dim[-1],
            batch_size=batch_size[1])
        y_dim = encoder5_struct.layer_dim[-1]
        self.EC_y_S_pi = self.Encoder5_pi.output_source
        self.EC_y_T_pi = self.Encoder5_pi.output_target

        self.y_T = self.EC_5_CSL_target.output

        self.Encoder5_params = self.Encoder5_pi.params
        self.Encoder5_learning_rate = self.Encoder5_pi.learning_rate
        self.Encoder5_decay = self.Encoder5_pi.decay

        #------------------------------------------------------------------------
        #Encoder 3 Neural Network: present q_\phi({a_y}_n | {z_y}_n, y_n)
        #Input Append
        zyy_source = T.concatenate([self.zy_S, label_source], axis=1)
        zyy_target = T.concatenate([self.zy_T, self.y_T], axis=1)

        self.Encoder3 = nn.Gaussian_MLP(rng=rng,
                                        input_source=zyy_source,
                                        input_target=zyy_target,
                                        struct=encoder3_struct,
                                        batch_size=batch_size,
                                        params=init_params.EC3_params,
                                        name='Encoder3')

        ay_dim = encoder3_struct.mu.layer_dim[-1]
        self.EC_ay_S_mu = self.Encoder3.S_mu
        self.EC_ay_S_log_sigma = self.Encoder3.S_log_sigma
        self.EC_ay_S_sigma = T.exp(self.EC_ay_S_log_sigma)
        self.EC_ay_T_mu = self.Encoder3.T_mu
        self.EC_ay_T_log_sigma = self.Encoder3.T_log_sigma
        self.EC_ay_T_sigma = T.exp(self.EC_ay_T_log_sigma)

        self.ay_S = self.Encoder3.S_output
        self.ay_T = self.Encoder3.T_output

        self.Encoder3_params = self.Encoder3.params
        self.Encoder3_learning_rate = self.Encoder3.learning_rate
        self.Encoder3_decay = self.Encoder3.decay

        #------------------------------------------------------------------------
        #Encoder 2 Neural Network: present q_\phi({z_d}_n | x_n, d_n)
        self.Encoder2 = nn.Gaussian_MLP(rng=rng,
                                        input_source=xd_source,
                                        input_target=xd_target,
                                        struct=encoder2_struct,
                                        batch_size=batch_size,
                                        params=init_params.EC2_params,
                                        name='Encoder2')

        zd_dim = encoder2_struct.mu.layer_dim[-1]
        self.EC_zd_S_mu = self.Encoder2.S_mu
        self.EC_zd_S_log_sigma = self.Encoder2.S_log_sigma
        self.EC_zd_S_sigma = T.exp(self.EC_zd_S_log_sigma)
        self.EC_zd_T_mu = self.Encoder2.T_mu
        self.EC_zd_T_log_sigma = self.Encoder2.T_log_sigma
        self.EC_zd_T_sigma = T.exp(self.EC_zd_T_log_sigma)

        self.zd_S = self.Encoder2.S_output
        self.zd_T = self.Encoder2.T_output

        self.Encoder2_params = self.Encoder2.params
        self.Encoder2_learning_rate = self.Encoder2.learning_rate
        self.Encoder2_decay = self.Encoder2.decay

        #------------------------------------------------------------------------
        #Encoder X Neural Network: present q_\phi(d_n | {z_d}_n)
        self.EncoderX_pi = nn.NN_Block(rng=rng,
                                       input_source=self.zd_S,
                                       input_target=self.zd_T,
                                       struct=encoderX_struct,
                                       params=init_params.ECX_params,
                                       name='EncoderX_pi')

        self.EC_d_S_pi = self.EncoderX_pi.output_source
        self.EC_d_T_pi = self.EncoderX_pi.output_target

        self.EncoderX_params = self.EncoderX_pi.params
        self.EncoderX_learning_rate = self.EncoderX_pi.learning_rate
        self.EncoderX_decay = self.EncoderX_pi.decay

        #------------------------------------------------------------------------
        #Encoder 4 Neural Network: present q_\phi({a_d}_n | {z_d}_n, d_n)
        #Input Append
        zdd_source = T.concatenate([self.zd_S, d_source], axis=1)
        zdd_target = T.concatenate([self.zd_T, d_target], axis=1)

        self.Encoder4 = nn.Gaussian_MLP(rng=rng,
                                        input_source=zdd_source,
                                        input_target=zdd_target,
                                        struct=encoder4_struct,
                                        batch_size=batch_size,
                                        params=init_params.EC4_params,
                                        name='Encoder4')

        ad_dim = encoder4_struct.mu.layer_dim[-1]
        self.EC_ad_S_mu = self.Encoder4.S_mu
        self.EC_ad_S_log_sigma = self.Encoder4.S_log_sigma
        self.EC_ad_S_sigma = T.exp(self.EC_ad_S_log_sigma)
        self.EC_ad_T_mu = self.Encoder4.T_mu
        self.EC_ad_T_log_sigma = self.Encoder4.T_log_sigma
        self.EC_ad_T_sigma = T.exp(self.EC_ad_T_log_sigma)

        self.ad_S = self.Encoder4.S_output
        self.ad_T = self.Encoder4.T_output

        self.Encoder4_params = self.Encoder4.params
        self.Encoder4_learning_rate = self.Encoder4.learning_rate
        self.Encoder4_decay = self.Encoder4.decay

        #------------------------------------------------------------------------
        #Decoder 1 Neural Network: present p_\theta(x_n | {z_y}_n, {z_d}_n)
        zyzd_source = T.concatenate([self.zy_S, self.zd_S], axis=1)
        zyzd_target = T.concatenate([self.zy_T, self.zd_T], axis=1)

        self.Decoder1 = nn.Gaussian_MLP(rng=rng,
                                        input_source=zyzd_source,
                                        input_target=zyzd_target,
                                        struct=decoder1_struct,
                                        batch_size=batch_size,
                                        params=init_params.DC1_params,
                                        name='Decoder1')

        x_dim = decoder1_struct.mu.layer_dim[-1]
        self.DC_x_S_mu = self.Decoder1.S_mu
        self.DC_x_S_log_sigma = self.Decoder1.S_log_sigma
        self.DC_x_S_sigma = T.exp(self.DC_x_S_log_sigma)
        self.DC_x_T_mu = self.Decoder1.T_mu
        self.DC_x_T_log_sigma = self.Decoder1.T_log_sigma
        self.DC_x_T_sigma = T.exp(self.DC_x_T_log_sigma)

        self.Decoder1_params = self.Decoder1.params
        self.Decoder1_learning_rate = self.Decoder1.learning_rate
        self.Decoder1_decay = self.Decoder1.decay

        #------------------------------------------------------------------------
        #Decoder 2 Neural Network: present p_\theta({z_y}_n | {a_y}_n, y_n)
        ayy_source = T.concatenate([self.ay_S, label_source], axis=1)
        ayy_target = T.concatenate([self.ay_T, self.y_T], axis=1)

        self.Decoder2 = nn.Gaussian_MLP(rng=rng,
                                        input_source=ayy_source,
                                        input_target=ayy_target,
                                        struct=decoder2_struct,
                                        batch_size=batch_size,
                                        params=init_params.DC2_params,
                                        name='Decoder2')

        self.DC_zy_S_mu = self.Decoder2.S_mu
        self.DC_zy_S_log_sigma = self.Decoder2.S_log_sigma
        self.DC_zy_S_sigma = T.exp(self.DC_zy_S_log_sigma)
        self.DC_zy_T_mu = self.Decoder2.T_mu
        self.DC_zy_T_log_sigma = self.Decoder2.T_log_sigma
        self.DC_zy_T_sigma = T.exp(self.DC_zy_T_log_sigma)

        self.Decoder2_params = self.Decoder2.params
        self.Decoder2_learning_rate = self.Decoder2.learning_rate
        self.Decoder2_decay = self.Decoder2.decay

        #------------------------------------------------------------------------
        #Decoder 3 Neural Network: present p_\theta({z_d}_n | {a_d}_n, d_n)
        add_source = T.concatenate([self.ad_S, d_source], axis=1)
        add_target = T.concatenate([self.ad_T, d_target], axis=1)

        self.Decoder3 = nn.Gaussian_MLP(rng=rng,
                                        input_source=add_source,
                                        input_target=add_target,
                                        struct=decoder3_struct,
                                        batch_size=batch_size,
                                        params=init_params.DC3_params,
                                        name='Decoder3')

        self.DC_zd_S_mu = self.Decoder3.S_mu
        self.DC_zd_S_log_sigma = self.Decoder3.S_log_sigma
        self.DC_zd_S_sigma = T.exp(self.DC_zd_S_log_sigma)
        self.DC_zd_T_mu = self.Decoder3.T_mu
        self.DC_zd_T_log_sigma = self.Decoder3.T_log_sigma
        self.DC_zd_T_sigma = T.exp(self.DC_zd_T_log_sigma)

        self.Decoder3_params = self.Decoder3.params
        self.Decoder3_learning_rate = self.Decoder3.learning_rate
        self.Decoder3_decay = self.Decoder3.decay

        #------------------------------------------------------------------------
        # Error Function Set
        # KL(q(zy)||p(zy)) -----------
        self.KL_zy_source = er.KLGaussianGaussian(
            self.EC_zy_S_mu, self.EC_zy_S_log_sigma, self.DC_zy_S_mu,
            self.DC_zy_S_log_sigma).sum()
        self.KL_zy_target = er.KLGaussianGaussian(
            self.EC_zy_T_mu, self.EC_zy_T_log_sigma, self.DC_zy_T_mu,
            self.DC_zy_T_log_sigma).sum()

        # KL(q(zd)||p(zd)) -----------
        self.KL_zd_source = er.KLGaussianGaussian(
            self.EC_zd_S_mu, self.EC_zd_S_log_sigma, self.DC_zd_S_mu,
            self.DC_zd_S_log_sigma).sum()
        self.KL_zd_target = er.KLGaussianGaussian(
            self.EC_zd_T_mu, self.EC_zd_T_log_sigma, self.DC_zd_T_mu,
            self.DC_zd_T_log_sigma).sum()

        # KL(q(ay)||p(ay)) -----------
        self.KL_ay_source = er.KLGaussianStdGaussian(
            self.EC_ay_S_mu, self.EC_ay_S_log_sigma).sum()
        self.KL_ay_target = er.KLGaussianStdGaussian(
            self.EC_ay_T_mu, self.EC_ay_T_log_sigma).sum()

        # KL(q(ad)||p(ad)) -----------
        self.KL_ad_source = er.KLGaussianStdGaussian(
            self.EC_ad_S_mu, self.EC_ad_S_log_sigma).sum()
        self.KL_ad_target = er.KLGaussianStdGaussian(
            self.EC_ad_T_mu, self.EC_ad_T_log_sigma).sum()

        # KL(q(y)||p(y)) only target data-----------
        # prior of y is set to 1/K, K is category number
        threshold = 0.0000001
        pi_0 = T.ones([batch_size[1], y_dim],
                      dtype=theano.config.floatX) / y_dim
        self.KL_y_target = T.sum(
            -self.EC_y_T_pi *
            T.log(T.maximum(self.EC_y_T_pi / pi_0, threshold)),
            axis=1).sum()

        # Likelihood q(y) only source data-----------
        self.LH_y_source = -T.sum(
            -label_source * T.log(T.maximum(self.EC_y_S_pi, threshold)),
            axis=1).sum()
        #self.LH_y_source = T.nnet.nnet.categorical_crossentropy(self.EC_y_S_pi, label_source)

        # Likelihood p(x) ----------- if gaussian
        self.LH_x_source = er.LogGaussianPDF(input_source, self.DC_x_S_mu,
                                             self.DC_x_S_log_sigma).sum()
        self.LH_x_target = er.LogGaussianPDF(input_target, self.DC_x_T_mu,
                                             self.DC_x_T_log_sigma).sum()

        # Likelihood q(d|z_d) only source data-----------
        self.LH_d_source = -T.sum(
            -d_source * T.log(T.maximum(self.EC_d_S_pi, threshold)),
            axis=1).sum()
        self.LH_d_target = -T.sum(
            -d_target * T.log(T.maximum(self.EC_d_T_pi, threshold)),
            axis=1).sum()

        # MMD betwween s, x using gaussian kernel-----------
        #self.MMD = MMD(self.zy_S, self.zy_T, batch_size)
        self.MMD = er.MMDEstimator(rng, self.zy_S, self.zy_T, zy_dim,
                                   batch_size, D).sum()

        #self.zy = T.concatenate([self.zy_S, self.zy_T], axis=0)
        #self.zd = T.concatenate([self.zd_S, self.zd_T], axis=0)
        #self.MMD = er.MMDEstimator(rng, self.zy, self.zd, zy_dim, batch_size*2, D)

        #Cost function
        tmp = self.KL_zy_source + self.KL_zy_target + self.KL_ay_source + self.KL_ay_target \
            + self.KL_zd_source + self.KL_zd_target + self.KL_ad_source + self.KL_ad_target \
            + self.LH_x_source*chi + self.LH_x_target*chi+ self.KL_y_target + self.LH_y_source * alpha \
            + self.LH_d_source * gamma + self.LH_d_target * gamma
        self.cost = -tmp / (batch_size[0] + batch_size[1]) + self.MMD * beta

        # the parameters of the model
        self.params = self.Encoder1_params + self.Encoder2_params + self.Encoder3_params \
                    + self.Encoder4_params + self.Encoder5_params + self.EncoderX_params\
                    + self.Decoder1_params + self.Decoder2_params + self.Decoder3_params

        self.learning_rate = self.Encoder1_learning_rate + self.Encoder2_learning_rate + self.Encoder3_learning_rate \
                    + self.Encoder4_learning_rate + self.Encoder5_learning_rate+ self.EncoderX_learning_rate \
                    + self.Decoder1_learning_rate + self.Decoder2_learning_rate + self.Decoder3_learning_rate

        self.decay = self.Encoder1_decay + self.Encoder2_decay + self.Encoder3_decay \
                    + self.Encoder4_decay + self.Encoder5_decay + self.EncoderX_decay \
                    + self.Decoder1_decay + self.Decoder2_decay + self.Decoder3_decay

        if optimize == 'Adam_update':
            #Adam update function
            self.updates = nn.adam(loss=self.cost,
                                   all_params=self.params,
                                   all_learning_rate=self.learning_rate)
        elif optimize == 'SGD':
            #Standard update function
            gparams = [T.grad(self.cost, param) for param in self.params]

            self.params_updates = [
                (param, param - learning_rate * gparam)
                for param, gparam, learning_rate in zip(
                    self.params, gparams, self.learning_rate)
            ]

            self.learning_rate_update = [
                (learning_rate, learning_rate * decay)
                for learning_rate, decay in zip(self.learning_rate, self.decay)
            ]

            self.updates = self.params_updates + self.learning_rate_update

        # keep track of model input
        self.input_source = input_source
        self.input_target = input_target

        #Predict Label
        self.y_pred_source = T.argmax(self.EC_y_S_pi, axis=1)
        self.y_pred_target = T.argmax(self.EC_y_T_pi, axis=1)