def __init__(self, rng, input_source, input_target, label_source, batch_size, struct, coef, train=False, init_params=None): """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input_source: theano.tensor.TensorType :param input: symbolic variable that describes the "Source Domain" input of the architecture (one minibatch) :type input_target: theano.tensor.TensorType :param input: symbolic variable that describes the "Target Domain" input of the architecture (one minibatch) :type xxx_struct: class NN_struct :param xxx_strucat: define the structure of each NN """ if train == True: # batch size is [source batch, target batch] batch_size[ 0] = batch_size[0] * coef.L # Source Batch size * Sample num batch_size[ 1] = batch_size[1] * coef.L # Target Batch size * Sample num tmp_S = input_source tmp_T = input_target tmp_l = label_source for i in range(coef.L - 1): # sample num - 1 since 0 index start # T = Theano tensor # adding lists of temp vars and arguments to Theano tensor along x/0 axis tmp_S = T.concatenate([tmp_S, input_source], axis=0) tmp_T = T.concatenate([tmp_T, input_target], axis=0) tmp_l = T.concatenate([tmp_l, label_source], axis=0) input_source = tmp_S input_target = tmp_T label_source = tmp_l L = coef.L # L = Sample number # if testing else: L = 1 self.L = L self.struct = struct # encoding encoder1_struct = struct.encoder1 encoder2_struct = struct.encoder2 encoder3_struct = struct.encoder3 # bottleneck # decodinf decoder1_struct = struct.decoder1 decoder2_struct = struct.decoder2 alpha = coef.alpha # classification weight beta = coef.beta # MMD Weight chi = coef.chi # criteria for reconstruction D = coef.D # number of random features for fast MMD optimize = coef.optimize # if parameters are not initialized, then update if init_params == None: init_params = VFAE_params() #------------------------------------------------------------------------ #Encoder 1 Neural Network: present q_\phi({z_y}_n | x_n, d_n) # what is this saying? zero_v_S = T.zeros([batch_size[0], 1], dtype=theano.config.floatX ) # 1D array of zeros based on source batch size zero_v_T = T.zeros([batch_size[1], 1], dtype=theano.config.floatX ) # 1D array of zeros based on target batch size one_v_S = T.ones([batch_size[0], 1], dtype=theano.config.floatX ) # 1D array of ones based on source batch size one_v_T = T.ones([batch_size[1], 1], dtype=theano.config.floatX ) # 1D array of ones based on target batch size d_source = T.concatenate([zero_v_S, one_v_S], axis=1) # zero source + one source xd_source = T.concatenate( [input_source, d_source], axis=1) # input source + zero source + one source d_target = T.concatenate([one_v_T, zero_v_T], axis=1) # one target + zero target xd_target = T.concatenate( [input_target, d_target], axis=1) # input target + one target + zero target self.Encoder1 = nn.Gaussian_MLP( rng=rng, # numpy random state input_source=xd_source, # input source + zero source + one source input_target=xd_target, # input target + one target + zero target struct=encoder1_struct, batch_size=batch_size, params=init_params.EC1_params, name='Encoder1') zy_dim = encoder1_struct.mu.layer_dim[-1] self.EC_zy_S_mu = self.Encoder1.S_mu self.EC_zy_S_log_sigma = self.Encoder1.S_log_sigma self.EC_zy_S_sigma = T.exp(self.EC_zy_S_log_sigma) self.EC_zy_T_mu = self.Encoder1.T_mu self.EC_zy_T_log_sigma = self.Encoder1.T_log_sigma self.EC_zy_T_sigma = T.exp(self.EC_zy_T_log_sigma) self.zy_S = self.Encoder1.S_output self.zy_T = self.Encoder1.T_output self.Encoder1_params = self.Encoder1.params # get parameters from params list self.Encoder1_learning_rate = self.Encoder1.learning_rate self.Encoder1_decay = self.Encoder1.decay #------------------------------------------------------------------------ #Encoder 3 Neural Network: present q_\phi(y_n | {z_1}_n) self.Encoder3_pi = nn.NN_Block(rng=rng, input_source=self.zy_S, input_target=self.zy_T, struct=encoder3_struct, params=init_params.EC3_params, name='Encoder3_pi') #Sample layer self.EC_3_CSL_target = nn.CatSampleLayer( pi=self.Encoder3_pi.output_target, n_in=encoder3_struct.layer_dim[-1], batch_size=batch_size[1]) y_dim = encoder3_struct.layer_dim[-1] self.EC_y_S_pi = self.Encoder3_pi.output_source self.EC_y_T_pi = self.Encoder3_pi.output_target self.y_T = self.EC_3_CSL_target.output self.Encoder3_params = self.Encoder3_pi.params self.Encoder3_learning_rate = self.Encoder3_pi.learning_rate self.Encoder3_decay = self.Encoder3_pi.decay #------------------------------------------------------------------------ #Encoder 2 Neural Network: present q_\phi({a_y}_n | {z_y}_n, y_n) #Input Append zyy_source = T.concatenate([self.zy_S, label_source], axis=1) zyy_target = T.concatenate([self.zy_T, self.y_T], axis=1) self.Encoder2 = nn.Gaussian_MLP(rng=rng, input_source=zyy_source, input_target=zyy_target, struct=encoder2_struct, batch_size=batch_size, params=init_params.EC2_params, name='Encoder2') ay_dim = encoder2_struct.mu.layer_dim[-1] self.EC_ay_S_mu = self.Encoder2.S_mu self.EC_ay_S_log_sigma = self.Encoder2.S_log_sigma self.EC_ay_S_sigma = T.exp(self.EC_ay_S_log_sigma) self.EC_ay_T_mu = self.Encoder2.T_mu self.EC_ay_T_log_sigma = self.Encoder2.T_log_sigma self.EC_ay_T_sigma = T.exp(self.EC_ay_T_log_sigma) self.ay_S = self.Encoder2.S_output self.ay_T = self.Encoder2.T_output self.Encoder2_params = self.Encoder2.params self.Encoder2_learning_rate = self.Encoder2.learning_rate self.Encoder2_decay = self.Encoder2.decay #------------------------------------------------------------------------ #Decoder 1 Neural Network: present p_\theta(x_n | {z_1}_n, d_n) zyd_source = T.concatenate([self.zy_S, d_source], axis=1) zyd_target = T.concatenate([self.zy_T, d_target], axis=1) self.Decoder1 = nn.Gaussian_MLP(rng=rng, input_source=zyd_source, input_target=zyd_target, struct=decoder1_struct, batch_size=batch_size, params=init_params.DC1_params, name='Decoder1') x_dim = decoder1_struct.mu.layer_dim[-1] self.DC_x_S_mu = self.Decoder1.S_mu self.DC_x_S_log_sigma = self.Decoder1.S_log_sigma self.DC_x_S_sigma = T.exp(self.DC_x_S_log_sigma) self.DC_x_T_mu = self.Decoder1.T_mu self.DC_x_T_log_sigma = self.Decoder1.T_log_sigma self.DC_x_T_sigma = T.exp(self.DC_x_T_log_sigma) self.Decoder1_params = self.Decoder1.params self.Decoder1_learning_rate = self.Decoder1.learning_rate self.Decoder1_decay = self.Decoder1.decay #------------------------------------------------------------------------ #Decoder 2 Neural Network: present p_\theta({z_y}_n | {a_y}_n, y_n) ayy_source = T.concatenate([self.ay_S, label_source], axis=1) ayy_target = T.concatenate([self.ay_T, self.y_T], axis=1) self.Decoder2 = nn.Gaussian_MLP(rng=rng, input_source=ayy_source, input_target=ayy_target, struct=decoder2_struct, batch_size=batch_size, params=init_params.DC2_params, name='Decoder2') self.DC_zy_S_mu = self.Decoder2.S_mu self.DC_zy_S_log_sigma = self.Decoder2.S_log_sigma self.DC_zy_S_sigma = T.exp(self.DC_zy_S_log_sigma) self.DC_zy_T_mu = self.Decoder2.T_mu self.DC_zy_T_log_sigma = self.Decoder2.T_log_sigma self.DC_zy_T_sigma = T.exp(self.DC_zy_T_log_sigma) self.Decoder2_params = self.Decoder2.params self.Decoder2_learning_rate = self.Decoder2.learning_rate self.Decoder2_decay = self.Decoder2.decay #------------------------------------------------------------------------ # Error Function Set # KL(q(zy)||p(zy)) ----------- self.KL_zy_source = er.KLGaussianGaussian( self.EC_zy_S_mu, self.EC_zy_S_log_sigma, self.DC_zy_S_mu, self.DC_zy_S_log_sigma).sum() self.KL_zy_target = er.KLGaussianGaussian( self.EC_zy_T_mu, self.EC_zy_T_log_sigma, self.DC_zy_T_mu, self.DC_zy_T_log_sigma).sum() # KL(q(ay)||p(ay)) ----------- self.KL_ay_source = er.KLGaussianStdGaussian( self.EC_ay_S_mu, self.EC_ay_S_log_sigma).sum() self.KL_ay_target = er.KLGaussianStdGaussian( self.EC_ay_T_mu, self.EC_ay_T_log_sigma).sum() # KL(q(y)||p(y)) only target data----------- # prior of y is set to 1/K, K is category number threshold = 0.0000001 pi_0 = T.ones([batch_size[1], y_dim], dtype=theano.config.floatX) / y_dim self.KL_y_target = T.sum( -self.EC_y_T_pi * T.log(T.maximum(self.EC_y_T_pi / pi_0, threshold)), axis=1).sum() # Likelihood q(y) only source data----------- self.LH_y_source = -T.sum( -label_source * T.log(T.maximum(self.EC_y_S_pi, threshold)), axis=1).sum() # Likelihood p(x) ----------- if gaussian self.LH_x_source = er.LogGaussianPDF(input_source, self.DC_x_S_mu, self.DC_x_S_log_sigma).sum() self.LH_x_target = er.LogGaussianPDF(input_target, self.DC_x_T_mu, self.DC_x_T_log_sigma).sum() # MMD betwween s, x using gaussian kernel----------- #self.MMD = MMD(self.zy_S, self.zy_T, batch_size) self.MMD = er.MMDEstimator(rng, self.zy_S, self.zy_T, zy_dim, batch_size, D) #Cost function tmp = self.KL_zy_source + self.KL_zy_target + self.KL_ay_source + self.KL_ay_target \ + self.LH_x_source*chi + self.LH_x_target*chi + self.KL_y_target + self.LH_y_source * alpha self.cost = -tmp / (batch_size[0] + batch_size[1]) + self.MMD * beta # the parameters of the model self.params = self.Encoder1_params + self.Encoder2_params + self.Encoder3_params + self.Decoder1_params + self.Decoder2_params self.learning_rate = self.Encoder1_learning_rate + self.Encoder2_learning_rate + self.Encoder3_learning_rate \ + self.Decoder1_learning_rate + self.Decoder2_learning_rate self.decay = self.Encoder1_decay + self.Encoder2_decay + self.Encoder3_decay + self.Decoder1_decay + self.Decoder2_decay if optimize == 'Adam_update' and train: #Adam update function self.updates = nn.adam(loss=self.cost, all_params=self.params, all_learning_rate=self.learning_rate) elif optimize == 'SGD' and train: #Standard update function gparams = [T.grad(self.cost, param) for param in self.params] self.params_updates = [ (param, param - learning_rate * gparam) for param, gparam, learning_rate in zip( self.params, gparams, self.learning_rate) ] self.learning_rate_update = [ (learning_rate, learning_rate * decay) for learning_rate, decay in zip(self.learning_rate, self.decay) ] self.updates = self.params_updates + self.learning_rate_update # keep track of model input self.input_source = input_source self.input_target = input_target #Predict Label self.y_pred_source = T.argmax(self.EC_y_S_pi, axis=1) self.y_pred_target = T.argmax(self.EC_y_T_pi, axis=1)
def __init__(self, rng, input_source, input_target, label_source, label_target, batch_size, encoder1_struct, encoder2_struct, encoder3_struct, decoder1_struct, decoder2_struct, alpha, beta, D): """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input_source: theano.tensor.TensorType :param input: symbolic variable that describes the "Source Domain" input of the architecture (one minibatch) :type input_target: theano.tensor.TensorType :param input: symbolic variable that describes the "Target Domain" input of the architecture (one minibatch) :type xxx_struct: class NN_struct :param xxx_strucat: define the structure of each NN """ #------------------------------------------------------------------------ #Encoder 1 Neural Network: present q_\phi({z_y}_n | x_n, d_n) d_source = T.zeros([batch_size, 1], dtype=theano.config.floatX) xd_source = T.concatenate([input_source, d_source], axis=1) d_target = T.ones([batch_size, 1], dtype=theano.config.floatX) xd_target = T.concatenate([input_target, d_target], axis=1) self.Encoder1_mu = nn.NN_Block_0L(rng=rng, input_source=xd_source, input_target=xd_target, struct=encoder1_struct, name='Encoder1_mu') self.Encoder1_sigma = nn.NN_Block_0L(rng=rng, input_source=xd_source, input_target=xd_target, struct=encoder1_struct, name='Encoder1_sigma') #Sample layer self.EC_1_GSL_source = nn.GaussianSampleLayer( mu=self.Encoder1_mu.output_source, log_sigma=self.Encoder1_sigma.output_source, n_in=encoder1_struct.layer_dim[-1], batch_size=batch_size) self.EC_1_GSL_target = nn.GaussianSampleLayer( mu=self.Encoder1_mu.output_target, log_sigma=self.Encoder1_sigma.output_target, n_in=encoder1_struct.layer_dim[-1], batch_size=batch_size) zy_dim = encoder1_struct.layer_dim[-1] self.EC_zy_S_mu = self.Encoder1_mu.output_source self.EC_zy_S_log_sigma = self.Encoder1_sigma.output_source self.EC_zy_S_sigma = T.exp(self.EC_zy_S_log_sigma) self.EC_zy_T_mu = self.Encoder1_mu.output_target self.EC_zy_T_log_sigma = self.Encoder1_sigma.output_target self.EC_zy_T_sigma = T.exp(self.EC_zy_T_log_sigma) self.zy_S = self.EC_1_GSL_source.output self.zy_T = self.EC_1_GSL_target.output self.Encoder1_params = self.Encoder1_mu.params + self.Encoder1_sigma.params #self.Encoder1_outputs = [("EC_zy_S_mu", self.EC_zy_S_mu), ("EC_zy_S_log_sigma", self.EC_zy_S_log_sigma), ("zy_S", self.zy_S), # ("EC_zy_T_mu", self.EC_zy_T_mu), ("EC_zy_T_log_sigma", self.EC_zy_T_log_sigma), ("zy_T", self.zy_T)] self.Encoder1_outputs = [ self.EC_zy_S_mu, self.EC_zy_S_log_sigma, self.zy_S, self.EC_zy_T_mu, self.EC_zy_T_log_sigma, self.zy_T ] self.Encoder1_outputs_name = [ "EC_zy_S_mu", "EC_zy_S_log_sigma", "zy_S", "EC_zy_T_mu", "EC_zy_T_log_sigma", "zy_T" ] #------------------------------------------------------------------------ #Encoder 3 Neural Network: present q_\phi(y_n | {z_1}_n) self.Encoder3_pi = nn.NN_Block_0L(rng=rng, input_source=self.zy_S, input_target=self.zy_T, struct=encoder3_struct, name='Encoder3_pi') #Sample layer self.EC_3_CSL_target = nn.CatSampleLayer( pi=self.Encoder3_pi.output_target, n_in=encoder3_struct.layer_dim[-1], batch_size=batch_size) y_dim = encoder3_struct.layer_dim[-1] self.EC_y_S_pi = self.Encoder3_pi.output_source self.EC_y_T_pi = self.Encoder3_pi.output_target self.Encoder3_params = self.Encoder3_pi.params #self.Encoder3_outputs = [("EC_y_S_pi",self.EC_y_S_pi), ("EC_y_T_pi",self.EC_y_T_pi), ("y_T",self.y_T)] self.Encoder3_outputs = [self.EC_y_S_pi, self.EC_y_T_pi] self.Encoder3_outputs_name = ["EC_y_S_pi", "EC_y_T_pi"] #------------------------------------------------------------------------ #Encoder 2 Neural Network: present q_\phi({a_y}_n | {z_y}_n, y_n) #Input Append zyy_source = T.concatenate([self.zy_S, label_source], axis=1) zyy_target = T.concatenate([self.zy_T, label_target], axis=1) self.Encoder2_mu = nn.NN_Block_0L(rng=rng, input_source=zyy_source, input_target=zyy_target, struct=encoder2_struct, name='Encoder2_mu') self.Encoder2_sigma = nn.NN_Block_0L(rng=rng, input_source=zyy_source, input_target=zyy_target, struct=encoder2_struct, name='Encoder2_sigma') #Sample layer self.EC_2_GSL_source = nn.GaussianSampleLayer( mu=self.Encoder2_mu.output_source, log_sigma=self.Encoder2_sigma.output_source, n_in=encoder2_struct.layer_dim[-1], batch_size=batch_size) self.EC_2_GSL_target = nn.GaussianSampleLayer( mu=self.Encoder2_mu.output_target, log_sigma=self.Encoder2_sigma.output_target, n_in=encoder2_struct.layer_dim[-1], batch_size=batch_size) ay_dim = encoder2_struct.layer_dim[-1] self.EC_ay_S_mu = self.Encoder2_mu.output_source self.EC_ay_S_log_sigma = self.Encoder2_sigma.output_source self.EC_ay_S_sigma = T.exp(self.EC_ay_S_log_sigma) self.EC_ay_T_mu = self.Encoder2_mu.output_target self.EC_ay_T_log_sigma = self.Encoder2_sigma.output_target self.EC_ay_T_sigma = T.exp(self.EC_ay_T_log_sigma) self.ay_S = self.EC_2_GSL_source.output self.ay_T = self.EC_2_GSL_target.output self.Encoder2_params = self.Encoder2_mu.params + self.Encoder2_sigma.params #self.Encoder2_outputs = [("EC_ay_S_mu", self.EC_ay_S_mu), ("EC_ay_S_log_sigma", self.EC_ay_S_log_sigma), ("ay_S", self.ay_S), # ("EC_ay_T_mu",self.EC_ay_T_mu), ("EC_ay_T_log_sigma",self.EC_ay_T_log_sigma), ("ay_T", self.ay_T)] self.Encoder2_outputs = [ self.EC_ay_S_mu, self.EC_ay_S_log_sigma, self.ay_S, self.EC_ay_T_mu, self.EC_ay_T_log_sigma, self.ay_T ] self.Encoder2_outputs_name = [ "EC_ay_S_mu", "EC_ay_S_log_sigma", "ay_S", "EC_ay_T_mu", "EC_ay_T_log_sigma", "ay_T" ] #------------------------------------------------------------------------ #Decoder 1 Neural Network: present p_\theta(x_n | {z_1}_n, s_n) zyd_source = T.concatenate([self.zy_S, d_source], axis=1) zyd_target = T.concatenate([self.zy_T, d_target], axis=1) self.Decoder1_mu = nn.NN_Block_0L(rng=rng, input_source=zyd_source, input_target=zyd_target, struct=decoder1_struct, name='Decoder1_mu') self.Decoder1_sigma = nn.NN_Block_0L(rng=rng, input_source=zyd_source, input_target=zyd_target, struct=decoder1_struct, name='Decoder1_sigma') ''' #Sample layer self.DC_1_GSL_source = GaussianSampleLayer( mu=self.Decoder1_mu.output_source, log_sigma=self.Decoder1_sigma.output_source, n_in = decoder1_struct.layer_dim[-1], batch_size = batch_size ) self.DC_1_GSL_target = GaussianSampleLayer( mu=self.Decoder1_mu.output_target, log_sigma=self.Decoder1_sigma.output_target, n_in = decoder1_struct.layer_dim[-1], batch_size = batch_size ) ''' x_dim = decoder1_struct.layer_dim[-1] self.DC_x_S_mu = self.Decoder1_mu.output_source self.DC_x_S_log_sigma = self.Decoder1_sigma.output_source self.DC_x_S_sigma = T.exp(self.DC_x_S_log_sigma) self.DC_x_T_mu = self.Decoder1_mu.output_target self.DC_x_T_log_sigma = self.Decoder1_sigma.output_target self.DC_x_T_sigma = T.exp(self.DC_x_T_log_sigma) #self.reconstructed_x_S = self.DC_1_GSL_source.output #self.reconstructed_x_T = self.DC_1_GSL_target.output self.Decoder1_params = self.Decoder1_mu.params + self.Decoder1_sigma.params #self.Decoder1_outputs = [("DC_x_S_mu", self.DC_x_S_mu), ("DC_x_S_log_sigma", self.DC_x_S_log_sigma), # ("DC_x_T_mu", self.DC_x_T_mu), ("DC_x_T_log_sigma", self.DC_x_T_log_sigma)] self.Decoder1_outputs = [ self.DC_x_S_mu, self.DC_x_S_log_sigma, self.DC_x_T_mu, self.DC_x_T_log_sigma ] self.Decoder1_outputs_name = [ "DC_x_S_mu", "DC_x_S_log_sigma", "DC_x_T_mu", "DC_x_T_log_sigma" ] #------------------------------------------------------------------------ #Decoder 2 Neural Network: present p_\theta({z_y}_n | {a_y}_n, y_n) ayy_source = T.concatenate([self.ay_S, label_source], axis=1) ayy_target = T.concatenate([self.ay_T, label_target], axis=1) self.Decoder2_mu = nn.NN_Block_0L(rng=rng, input_source=ayy_source, input_target=ayy_target, struct=decoder2_struct, name='Decoder2_mu') self.Decoder2_sigma = nn.NN_Block_0L(rng=rng, input_source=ayy_source, input_target=ayy_target, struct=decoder2_struct, name='Decoder2_sigma') self.DC_zy_S_mu = self.Decoder2_mu.output_source self.DC_zy_S_log_sigma = self.Decoder2_sigma.output_source self.DC_zy_S_sigma = T.exp(self.DC_zy_S_log_sigma) self.DC_zy_T_mu = self.Decoder2_mu.output_target self.DC_zy_T_log_sigma = self.Decoder2_sigma.output_target self.DC_zy_T_sigma = T.exp(self.DC_zy_T_log_sigma) self.Decoder2_params = self.Decoder2_mu.params + self.Decoder2_sigma.params #self.Decoder2_outputs = [("DC_zy_S_mu", self.DC_zy_S_mu), ("DC_zy_S_log_sigma", self.DC_zy_S_log_sigma), # ("DC_zy_T_mu", self.DC_zy_T_mu), ("DC_zy_T_log_sigma", self.DC_zy_T_log_sigma)] self.Decoder2_outputs = [ self.DC_zy_S_mu, self.DC_zy_S_log_sigma, self.DC_zy_T_mu, self.DC_zy_T_log_sigma ] self.Decoder2_outputs_name = [ "DC_zy_S_mu", "DC_zy_S_log_sigma", "DC_zy_T_mu", "DC_zy_T_log_sigma" ] #19 20 21 22 #------------------------------------------------------------------------ # Error Function Set # KL(q(zy)||p(zy)) ----------- self.KL_zy_source = er.KLGaussianGaussian(self.EC_zy_S_mu, self.EC_zy_S_log_sigma, self.DC_zy_S_mu, self.DC_zy_S_log_sigma) self.KL_zy_target = er.KLGaussianGaussian(self.EC_zy_T_mu, self.EC_zy_T_log_sigma, self.DC_zy_T_mu, self.DC_zy_T_log_sigma) # KL(q(ay)||p(ay)) ----------- self.KL_ay_source = er.KLGaussianStdGaussian(self.EC_ay_S_mu, self.EC_ay_S_log_sigma) self.KL_ay_target = er.KLGaussianStdGaussian(self.EC_ay_T_mu, self.EC_ay_T_log_sigma) threshold = 0.0000001 # Likelihood q(y) only source data----------- self.LH_y_source = -T.sum( -label_source * T.log(T.maximum(self.EC_y_S_pi, threshold)), axis=1) self.LH_y_target = -T.sum( -label_target * T.log(T.maximum(self.EC_y_T_pi, threshold)), axis=1) #self.LH_y_source = T.nnet.nnet.categorical_crossentropy(self.EC_y_S_pi, label_source) # Likelihood p(x) ----------- if gaussian self.LH_x_source = er.LogGaussianPDF(input_source, self.DC_x_S_mu, self.DC_x_S_log_sigma) self.LH_x_target = er.LogGaussianPDF(input_target, self.DC_x_T_mu, self.DC_x_T_log_sigma) #self.LH_x_source = - T.nnet.binary_crossentropy(self.reconstructed_x_S, input_source) #self.LH_x_target = - T.nnet.binary_crossentropy(self.reconstructed_x_T, input_target) # MMD betwween s, x using gaussian kernel----------- #self.MMD = MMD(self.zy_S, self.zy_T, batch_size) self.MMD = er.MMDEstimator(rng, self.zy_S, self.zy_T, zy_dim, batch_size, D) #Cost function tmp = self.KL_zy_source + self.KL_zy_target + self.KL_ay_source + self.KL_ay_target \ + self.LH_x_source + self.LH_x_target + self.LH_y_source * alpha + self.LH_y_target * alpha self.cost = -tmp.mean() + self.MMD * beta # the parameters of the model self.params = self.Encoder1_params + self.Encoder2_params + self.Encoder3_params + self.Decoder1_params + self.Decoder2_params # all output of VAE self.outputs = self.Encoder1_outputs + self.Encoder2_outputs + self.Encoder3_outputs + self.Decoder1_outputs + self.Decoder2_outputs self.outputs_name = self.Encoder1_outputs_name + self.Encoder2_outputs_name + self.Encoder3_outputs_name \ + self.Decoder1_outputs_name + self.Decoder2_outputs_name # keep track of model input self.input_source = input_source self.input_target = input_target #Predict Label self.y_pred_source = T.argmax(self.EC_y_S_pi, axis=1) self.y_pred_target = T.argmax(self.EC_y_T_pi, axis=1)
def __init__(self, rng, input_source, input_target, label_source, batch_size, struct, coef, train=False, init_params=None): """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input_source: theano.tensor.TensorType :param input: symbolic variable that describes the "Source Domain" input of the architecture (one minibatch) :type input_target: theano.tensor.TensorType :param input: symbolic variable that describes the "Target Domain" input of the architecture (one minibatch) :type xxx_struct: class NN_struct :param xxx_strucat: define the structure of each NN """ self.struct = struct encoder_struct = struct.encoder classifier_struct = struct.classifier beta = coef.beta D = coef.D optimize = coef.optimize if init_params == None: init_params = NN_params() #------------------------------------------------------------------------ #Encoder Neural Network self.Encoder = nn.NN_Block(rng=rng, input_source=input_source, input_target=input_target, struct=encoder_struct, params=init_params.EC_params, name='Encoder') self.z_S = self.Encoder.output_source self.z_T = self.Encoder.output_target z_dim = encoder_struct.layer_dim[-1] self.Encoder_params = self.Encoder.params self.Encoder_learning_rate = self.Encoder.learning_rate self.Encoder_decay = self.Encoder.decay #------------------------------------------------------------------------ #Decoder Neural Network self.Classifier = nn.NN_Block(rng=rng, input_source=self.z_S, input_target=self.z_T, struct=classifier_struct, params=init_params.CF_params, name='Classifier') self.y_S = self.Classifier.output_source self.y_T = self.Classifier.output_target self.Classifier_params = self.Classifier.params self.Classifier_learning_rate = self.Classifier.learning_rate self.Classifier_decay = self.Classifier.decay #------------------------------------------------------------------------ # Error Function Set # Classification only source data----------- threshold = 0.0000001 self.Error_source = T.mean( T.sum(-label_source * T.log(T.maximum(self.y_S, threshold)), axis=1)) # MMD betwween s, x using gaussian kernel----------- #self.MMD = MMD(self.zy_S, self.zy_T, batch_size) self.MMD = er.MMDEstimator(rng, self.z_S, self.z_T, z_dim, batch_size, D) #Cost function self.cost = self.Error_source + self.MMD * beta # the parameters of the model self.params = self.Encoder_params + self.Classifier_params self.learning_rate = self.Encoder_learning_rate + self.Classifier_learning_rate self.decay = self.Encoder_decay + self.Classifier_decay if optimize == 'Adam_update' and train: #Adam update function self.updates = nn.adam(loss=self.cost, all_params=self.params, all_learning_rate=self.learning_rate) elif optimize == 'SGD' and train: #Standard update function gparams = [T.grad(self.cost, param) for param in self.params] self.params_updates = [ (param, param - learning_rate * gparam) for param, gparam, learning_rate in zip( self.params, gparams, self.learning_rate) ] self.learning_rate_update = [ (learning_rate, learning_rate * decay) for learning_rate, decay in zip(self.learning_rate, self.decay) ] self.updates = self.params_updates + self.learning_rate_update # keep track of model input self.input_source = input_source self.input_target = input_target #Predict Label self.y_pred_source = T.argmax(self.y_S, axis=1) self.y_pred_target = T.argmax(self.y_T, axis=1)
def __init__(self, rng, input_source, input_target, label_source, batch_size, struct, coef, train=False, init_params=None): """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input_source: theano.tensor.TensorType :param input: symbolic variable that describes the "Source Domain" input of the architecture (one minibatch) :type input_target: theano.tensor.TensorType :param input: symbolic variable that describes the "Target Domain" input of the architecture (one minibatch) :type xxx_struct: class NN_struct :param xxx_strucat: define the structure of each NN """ if train == True: batch_size[0] = batch_size[0] * coef.L batch_size[1] = batch_size[1] * coef.L tmp_S = input_source tmp_T = input_target tmp_l = label_source for i in range(coef.L - 1): tmp_S = T.concatenate([tmp_S, input_source], axis=0) tmp_T = T.concatenate([tmp_T, input_target], axis=0) tmp_l = T.concatenate([tmp_l, label_source], axis=0) input_source = tmp_S input_target = tmp_T label_source = tmp_l L = coef.L else: L = 1 self.L = L self.struct = struct encoder1_struct = struct.encoder1 encoder2_struct = struct.encoder2 encoder3_struct = struct.encoder3 encoder4_struct = struct.encoder4 encoder5_struct = struct.encoder5 encoderX_struct = struct.encoderX decoder1_struct = struct.decoder1 decoder2_struct = struct.decoder2 decoder3_struct = struct.decoder3 alpha = coef.alpha beta = coef.beta chi = coef.chi gamma = coef.gamma D = coef.D optimize = coef.optimize if init_params == None: init_params = VLDF_params() #------------------------------------------------------------------------ #Encoder 1 Neural Network: present q_\phi({z_y}_n | x_n, d_n) zero_v_S = T.zeros([batch_size[0], 1], dtype=theano.config.floatX) zero_v_T = T.zeros([batch_size[1], 1], dtype=theano.config.floatX) one_v_S = T.ones([batch_size[0], 1], dtype=theano.config.floatX) one_v_T = T.ones([batch_size[1], 1], dtype=theano.config.floatX) d_source = T.concatenate([zero_v_S, one_v_S], axis=1) xd_source = T.concatenate([input_source, d_source], axis=1) d_target = T.concatenate([one_v_T, zero_v_T], axis=1) xd_target = T.concatenate([input_target, d_target], axis=1) self.Encoder1 = nn.Gaussian_MLP(rng=rng, input_source=xd_source, input_target=xd_target, struct=encoder1_struct, batch_size=batch_size, params=init_params.EC1_params, name='Encoder1') zy_dim = encoder1_struct.mu.layer_dim[-1] self.EC_zy_S_mu = self.Encoder1.S_mu self.EC_zy_S_log_sigma = self.Encoder1.S_log_sigma self.EC_zy_S_sigma = T.exp(self.EC_zy_S_log_sigma) self.EC_zy_T_mu = self.Encoder1.T_mu self.EC_zy_T_log_sigma = self.Encoder1.T_log_sigma self.EC_zy_T_sigma = T.exp(self.EC_zy_T_log_sigma) self.zy_S = self.Encoder1.S_output self.zy_T = self.Encoder1.T_output self.Encoder1_params = self.Encoder1.params self.Encoder1_learning_rate = self.Encoder1.learning_rate self.Encoder1_decay = self.Encoder1.decay #------------------------------------------------------------------------ #Encoder 5 Neural Network: present q_\phi(y_n | {z_y}_n) self.Encoder5_pi = nn.NN_Block(rng=rng, input_source=self.zy_S, input_target=self.zy_T, struct=encoder5_struct, params=init_params.EC5_params, name='Encoder5_pi') #Sample layer self.EC_5_CSL_target = nn.CatSampleLayer( pi=self.Encoder5_pi.output_target, n_in=encoder5_struct.layer_dim[-1], batch_size=batch_size[1]) y_dim = encoder5_struct.layer_dim[-1] self.EC_y_S_pi = self.Encoder5_pi.output_source self.EC_y_T_pi = self.Encoder5_pi.output_target self.y_T = self.EC_5_CSL_target.output self.Encoder5_params = self.Encoder5_pi.params self.Encoder5_learning_rate = self.Encoder5_pi.learning_rate self.Encoder5_decay = self.Encoder5_pi.decay #------------------------------------------------------------------------ #Encoder 3 Neural Network: present q_\phi({a_y}_n | {z_y}_n, y_n) #Input Append zyy_source = T.concatenate([self.zy_S, label_source], axis=1) zyy_target = T.concatenate([self.zy_T, self.y_T], axis=1) self.Encoder3 = nn.Gaussian_MLP(rng=rng, input_source=zyy_source, input_target=zyy_target, struct=encoder3_struct, batch_size=batch_size, params=init_params.EC3_params, name='Encoder3') ay_dim = encoder3_struct.mu.layer_dim[-1] self.EC_ay_S_mu = self.Encoder3.S_mu self.EC_ay_S_log_sigma = self.Encoder3.S_log_sigma self.EC_ay_S_sigma = T.exp(self.EC_ay_S_log_sigma) self.EC_ay_T_mu = self.Encoder3.T_mu self.EC_ay_T_log_sigma = self.Encoder3.T_log_sigma self.EC_ay_T_sigma = T.exp(self.EC_ay_T_log_sigma) self.ay_S = self.Encoder3.S_output self.ay_T = self.Encoder3.T_output self.Encoder3_params = self.Encoder3.params self.Encoder3_learning_rate = self.Encoder3.learning_rate self.Encoder3_decay = self.Encoder3.decay #------------------------------------------------------------------------ #Encoder 2 Neural Network: present q_\phi({z_d}_n | x_n, d_n) self.Encoder2 = nn.Gaussian_MLP(rng=rng, input_source=xd_source, input_target=xd_target, struct=encoder2_struct, batch_size=batch_size, params=init_params.EC2_params, name='Encoder2') zd_dim = encoder2_struct.mu.layer_dim[-1] self.EC_zd_S_mu = self.Encoder2.S_mu self.EC_zd_S_log_sigma = self.Encoder2.S_log_sigma self.EC_zd_S_sigma = T.exp(self.EC_zd_S_log_sigma) self.EC_zd_T_mu = self.Encoder2.T_mu self.EC_zd_T_log_sigma = self.Encoder2.T_log_sigma self.EC_zd_T_sigma = T.exp(self.EC_zd_T_log_sigma) self.zd_S = self.Encoder2.S_output self.zd_T = self.Encoder2.T_output self.Encoder2_params = self.Encoder2.params self.Encoder2_learning_rate = self.Encoder2.learning_rate self.Encoder2_decay = self.Encoder2.decay #------------------------------------------------------------------------ #Encoder X Neural Network: present q_\phi(d_n | {z_d}_n) self.EncoderX_pi = nn.NN_Block(rng=rng, input_source=self.zd_S, input_target=self.zd_T, struct=encoderX_struct, params=init_params.ECX_params, name='EncoderX_pi') self.EC_d_S_pi = self.EncoderX_pi.output_source self.EC_d_T_pi = self.EncoderX_pi.output_target self.EncoderX_params = self.EncoderX_pi.params self.EncoderX_learning_rate = self.EncoderX_pi.learning_rate self.EncoderX_decay = self.EncoderX_pi.decay #------------------------------------------------------------------------ #Encoder 4 Neural Network: present q_\phi({a_d}_n | {z_d}_n, d_n) #Input Append zdd_source = T.concatenate([self.zd_S, d_source], axis=1) zdd_target = T.concatenate([self.zd_T, d_target], axis=1) self.Encoder4 = nn.Gaussian_MLP(rng=rng, input_source=zdd_source, input_target=zdd_target, struct=encoder4_struct, batch_size=batch_size, params=init_params.EC4_params, name='Encoder4') ad_dim = encoder4_struct.mu.layer_dim[-1] self.EC_ad_S_mu = self.Encoder4.S_mu self.EC_ad_S_log_sigma = self.Encoder4.S_log_sigma self.EC_ad_S_sigma = T.exp(self.EC_ad_S_log_sigma) self.EC_ad_T_mu = self.Encoder4.T_mu self.EC_ad_T_log_sigma = self.Encoder4.T_log_sigma self.EC_ad_T_sigma = T.exp(self.EC_ad_T_log_sigma) self.ad_S = self.Encoder4.S_output self.ad_T = self.Encoder4.T_output self.Encoder4_params = self.Encoder4.params self.Encoder4_learning_rate = self.Encoder4.learning_rate self.Encoder4_decay = self.Encoder4.decay #------------------------------------------------------------------------ #Decoder 1 Neural Network: present p_\theta(x_n | {z_y}_n, {z_d}_n) zyzd_source = T.concatenate([self.zy_S, self.zd_S], axis=1) zyzd_target = T.concatenate([self.zy_T, self.zd_T], axis=1) self.Decoder1 = nn.Gaussian_MLP(rng=rng, input_source=zyzd_source, input_target=zyzd_target, struct=decoder1_struct, batch_size=batch_size, params=init_params.DC1_params, name='Decoder1') x_dim = decoder1_struct.mu.layer_dim[-1] self.DC_x_S_mu = self.Decoder1.S_mu self.DC_x_S_log_sigma = self.Decoder1.S_log_sigma self.DC_x_S_sigma = T.exp(self.DC_x_S_log_sigma) self.DC_x_T_mu = self.Decoder1.T_mu self.DC_x_T_log_sigma = self.Decoder1.T_log_sigma self.DC_x_T_sigma = T.exp(self.DC_x_T_log_sigma) self.Decoder1_params = self.Decoder1.params self.Decoder1_learning_rate = self.Decoder1.learning_rate self.Decoder1_decay = self.Decoder1.decay #------------------------------------------------------------------------ #Decoder 2 Neural Network: present p_\theta({z_y}_n | {a_y}_n, y_n) ayy_source = T.concatenate([self.ay_S, label_source], axis=1) ayy_target = T.concatenate([self.ay_T, self.y_T], axis=1) self.Decoder2 = nn.Gaussian_MLP(rng=rng, input_source=ayy_source, input_target=ayy_target, struct=decoder2_struct, batch_size=batch_size, params=init_params.DC2_params, name='Decoder2') self.DC_zy_S_mu = self.Decoder2.S_mu self.DC_zy_S_log_sigma = self.Decoder2.S_log_sigma self.DC_zy_S_sigma = T.exp(self.DC_zy_S_log_sigma) self.DC_zy_T_mu = self.Decoder2.T_mu self.DC_zy_T_log_sigma = self.Decoder2.T_log_sigma self.DC_zy_T_sigma = T.exp(self.DC_zy_T_log_sigma) self.Decoder2_params = self.Decoder2.params self.Decoder2_learning_rate = self.Decoder2.learning_rate self.Decoder2_decay = self.Decoder2.decay #------------------------------------------------------------------------ #Decoder 3 Neural Network: present p_\theta({z_d}_n | {a_d}_n, d_n) add_source = T.concatenate([self.ad_S, d_source], axis=1) add_target = T.concatenate([self.ad_T, d_target], axis=1) self.Decoder3 = nn.Gaussian_MLP(rng=rng, input_source=add_source, input_target=add_target, struct=decoder3_struct, batch_size=batch_size, params=init_params.DC3_params, name='Decoder3') self.DC_zd_S_mu = self.Decoder3.S_mu self.DC_zd_S_log_sigma = self.Decoder3.S_log_sigma self.DC_zd_S_sigma = T.exp(self.DC_zd_S_log_sigma) self.DC_zd_T_mu = self.Decoder3.T_mu self.DC_zd_T_log_sigma = self.Decoder3.T_log_sigma self.DC_zd_T_sigma = T.exp(self.DC_zd_T_log_sigma) self.Decoder3_params = self.Decoder3.params self.Decoder3_learning_rate = self.Decoder3.learning_rate self.Decoder3_decay = self.Decoder3.decay #------------------------------------------------------------------------ # Error Function Set # KL(q(zy)||p(zy)) ----------- self.KL_zy_source = er.KLGaussianGaussian( self.EC_zy_S_mu, self.EC_zy_S_log_sigma, self.DC_zy_S_mu, self.DC_zy_S_log_sigma).sum() self.KL_zy_target = er.KLGaussianGaussian( self.EC_zy_T_mu, self.EC_zy_T_log_sigma, self.DC_zy_T_mu, self.DC_zy_T_log_sigma).sum() # KL(q(zd)||p(zd)) ----------- self.KL_zd_source = er.KLGaussianGaussian( self.EC_zd_S_mu, self.EC_zd_S_log_sigma, self.DC_zd_S_mu, self.DC_zd_S_log_sigma).sum() self.KL_zd_target = er.KLGaussianGaussian( self.EC_zd_T_mu, self.EC_zd_T_log_sigma, self.DC_zd_T_mu, self.DC_zd_T_log_sigma).sum() # KL(q(ay)||p(ay)) ----------- self.KL_ay_source = er.KLGaussianStdGaussian( self.EC_ay_S_mu, self.EC_ay_S_log_sigma).sum() self.KL_ay_target = er.KLGaussianStdGaussian( self.EC_ay_T_mu, self.EC_ay_T_log_sigma).sum() # KL(q(ad)||p(ad)) ----------- self.KL_ad_source = er.KLGaussianStdGaussian( self.EC_ad_S_mu, self.EC_ad_S_log_sigma).sum() self.KL_ad_target = er.KLGaussianStdGaussian( self.EC_ad_T_mu, self.EC_ad_T_log_sigma).sum() # KL(q(y)||p(y)) only target data----------- # prior of y is set to 1/K, K is category number threshold = 0.0000001 pi_0 = T.ones([batch_size[1], y_dim], dtype=theano.config.floatX) / y_dim self.KL_y_target = T.sum( -self.EC_y_T_pi * T.log(T.maximum(self.EC_y_T_pi / pi_0, threshold)), axis=1).sum() # Likelihood q(y) only source data----------- self.LH_y_source = -T.sum( -label_source * T.log(T.maximum(self.EC_y_S_pi, threshold)), axis=1).sum() #self.LH_y_source = T.nnet.nnet.categorical_crossentropy(self.EC_y_S_pi, label_source) # Likelihood p(x) ----------- if gaussian self.LH_x_source = er.LogGaussianPDF(input_source, self.DC_x_S_mu, self.DC_x_S_log_sigma).sum() self.LH_x_target = er.LogGaussianPDF(input_target, self.DC_x_T_mu, self.DC_x_T_log_sigma).sum() # Likelihood q(d|z_d) only source data----------- self.LH_d_source = -T.sum( -d_source * T.log(T.maximum(self.EC_d_S_pi, threshold)), axis=1).sum() self.LH_d_target = -T.sum( -d_target * T.log(T.maximum(self.EC_d_T_pi, threshold)), axis=1).sum() # MMD betwween s, x using gaussian kernel----------- #self.MMD = MMD(self.zy_S, self.zy_T, batch_size) self.MMD = er.MMDEstimator(rng, self.zy_S, self.zy_T, zy_dim, batch_size, D).sum() #self.zy = T.concatenate([self.zy_S, self.zy_T], axis=0) #self.zd = T.concatenate([self.zd_S, self.zd_T], axis=0) #self.MMD = er.MMDEstimator(rng, self.zy, self.zd, zy_dim, batch_size*2, D) #Cost function tmp = self.KL_zy_source + self.KL_zy_target + self.KL_ay_source + self.KL_ay_target \ + self.KL_zd_source + self.KL_zd_target + self.KL_ad_source + self.KL_ad_target \ + self.LH_x_source*chi + self.LH_x_target*chi+ self.KL_y_target + self.LH_y_source * alpha \ + self.LH_d_source * gamma + self.LH_d_target * gamma self.cost = -tmp / (batch_size[0] + batch_size[1]) + self.MMD * beta # the parameters of the model self.params = self.Encoder1_params + self.Encoder2_params + self.Encoder3_params \ + self.Encoder4_params + self.Encoder5_params + self.EncoderX_params\ + self.Decoder1_params + self.Decoder2_params + self.Decoder3_params self.learning_rate = self.Encoder1_learning_rate + self.Encoder2_learning_rate + self.Encoder3_learning_rate \ + self.Encoder4_learning_rate + self.Encoder5_learning_rate+ self.EncoderX_learning_rate \ + self.Decoder1_learning_rate + self.Decoder2_learning_rate + self.Decoder3_learning_rate self.decay = self.Encoder1_decay + self.Encoder2_decay + self.Encoder3_decay \ + self.Encoder4_decay + self.Encoder5_decay + self.EncoderX_decay \ + self.Decoder1_decay + self.Decoder2_decay + self.Decoder3_decay if optimize == 'Adam_update': #Adam update function self.updates = nn.adam(loss=self.cost, all_params=self.params, all_learning_rate=self.learning_rate) elif optimize == 'SGD': #Standard update function gparams = [T.grad(self.cost, param) for param in self.params] self.params_updates = [ (param, param - learning_rate * gparam) for param, gparam, learning_rate in zip( self.params, gparams, self.learning_rate) ] self.learning_rate_update = [ (learning_rate, learning_rate * decay) for learning_rate, decay in zip(self.learning_rate, self.decay) ] self.updates = self.params_updates + self.learning_rate_update # keep track of model input self.input_source = input_source self.input_target = input_target #Predict Label self.y_pred_source = T.argmax(self.EC_y_S_pi, axis=1) self.y_pred_target = T.argmax(self.EC_y_T_pi, axis=1)