decay_rate=lr_decay) lr2 = tf.train.exponential_decay(lr_start, global_step=global_step2, decay_steps=int(mnist.train.images.shape[0] / 100), decay_rate=lr_decay) sess = tf.Session() saver = tf.train.Saver() #saver.restore(sess, "model/model.ckpt") other_var = [ var for var in tf.trainable_variables() if not var.name.endswith('kernel:0') ] opt = binary.AdamOptimizer(binary.get_all_LR_scale(), lr1) opt2 = tf.train.AdamOptimizer(lr2) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies( update_ops ): # when training, the moving_mean and moving_variance in the BN need to be updated. train_kernel_op = opt.apply_gradients(binary.compute_grads(loss, opt), global_step=global_step1) train_other_op = opt2.minimize(loss, var_list=other_var, global_step=global_step2) accuracy = tf.reduce_mean( tf.cast(tf.equal(tf.argmax(layer4, 1), tf.argmax(target, 1)), tf.float32)) sess.run(tf.global_variables_initializer())
train_epochs = 500 test_epochs = 1 lr_start = 0.003 lr_end = 0.0000003 lr_decay = (lr_end / lr_start)**(1. / train_epochs) global_step1 = tf.Variable(0, trainable=False) global_step2 = tf.Variable(0, trainable=False) lr1 = tf.train.exponential_decay(lr_start, global_step=global_step1, decay_steps=int(mnist.train.images.shape[0]/100), decay_rate=lr_decay) lr2 = tf.train.exponential_decay(lr_start, global_step=global_step2, decay_steps=int(mnist.train.images.shape[0]/100), decay_rate=lr_decay) sess = tf.Session() saver = tf.train.Saver() other_var = [var for var in tf.trainable_variables() if not var.name.endswith('kernel:0')] opt = binary_layer.AdamOptimizer(binary_layer.get_all_LR_scale(), lr1) opt2 = tf.train.AdamOptimizer(lr2) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): # when training, the moving_mean and moving_variance in the BN need to be updated. train_kernel_op = opt.apply_gradients(binary_layer.compute_grads(loss, opt), global_step=global_step1) train_other_op = opt2.minimize(loss, var_list=other_var, global_step=global_step2) sess.run(tf.global_variables_initializer()) print("Training started.....") ######train time ============================================================= old_acc = 0.0 X_train, y_train = shuffle(mnisttrain, mnist.train.labels)
class VAE(object): """ Beta Variational Auto Encoder. """ def __init__(self, gamma=2.35, capacity_limit=25.0, capacity_change_duration=100000, learning_rate=1e-4,n3=32,k3=16,r3=16,EbN0dB=-2,EbN0dBb=10): self.learning_rate = learning_rate # with tf.variable_scope("gen", reuse=reuse) as scope: self.n3=n3 self.k3=k3 self.r3 = r3 self.rate=self.k3/self.n3 self.EbN0 = 10**(0.1*EbN0dB) self.N0 = 1/(self.EbN0*self.rate) self.EbN0b = 10**(0.1*EbN0dBb) self.N0b = 1/(self.EbN0b*self.rate) self.x_M = tf.placeholder(tf.float32, shape = (None, self.k3)) self.x_R = tf.placeholder(tf.float32, shape = (None, self.r3)) self.PermutationIndices=tf.placeholder(tf.int32, (None,)) self.n_1 = tf.placeholder(tf.float32, shape = (None, self.n3)) self.n_2 = tf.placeholder(tf.float32, shape = (None, self.n3)) self.c_sum = tf.placeholder(tf.float32, shape = (None, self.n3)) self.c_R_data = tf.placeholder(tf.float32, shape = (None, self.n3)) self.c_M_data = tf.placeholder(tf.float32, shape = (None, self.n3)) # Create autoencoder network #self._create_network() # Define loss function and corresponding optimizer self._create_loss_optimizer() def setLearningRate(self): self.learning_rate = self.learning_rate/2 def _fc_weight_variable(self, weight_shape, name): name_w = "W_{0}".format(name) name_b = "b_{0}".format(name) input_channels = weight_shape[0] output_channels = weight_shape[1] d = 1.0 / np.sqrt(input_channels) bias_shape = [output_channels] weight = tf.get_variable(name_w, weight_shape, initializer=fc_initializer(input_channels)) bias = tf.get_variable(name_b, bias_shape, initializer=fc_initializer(input_channels)) return weight, bias def MINE(self, XJointOrMarg, reusee): with tf.variable_scope("MINE1", reuse = reusee): self.W_fcm1, self.b_fcm1 = self._fc_weight_variable([self.n3+self.k3, 500], "fcm1") self.W_fcm2, self.b_fcm2 = self._fc_weight_variable([500, 500], "fcm2") self.W_fcm3, self.b_fcm3 = self._fc_weight_variable([500,500], "fcm3") self.W_fcm3b, self.b_fcm3b = self._fc_weight_variable([500,500], "fcm3b") self.W_fcm4, self.b_fcm4 = self._fc_weight_variable([500,1], "fcm4") h_fc1 = tf.nn.relu(tf.matmul(XJointOrMarg, self.W_fcm1) + self.b_fcm1) h_fc2 = tf.nn.relu(tf.matmul(h_fc1, self.W_fcm2) + self.b_fcm2) h_fc3 = tf.nn.relu(tf.matmul(h_fc2, self.W_fcm3) + self.b_fcm3) h_fc3b = tf.nn.relu(tf.matmul(h_fc3, self.W_fcm3b) + self.b_fcm3b) h_fc4 = tf.matmul(h_fc3b, self.W_fcm4) + self.b_fcm4 return h_fc4 def MINEk(self, XJointOrMarg, reusee): with tf.variable_scope("MINEk1", reuse = reusee): self.W_fcm1, self.b_fcm1 = self._fc_weight_variable([self.n3+self.k3, 500], "fcm1") self.W_fcm2, self.b_fcm2 = self._fc_weight_variable([500, 500], "fcm2") self.W_fcm3, self.b_fcm3 = self._fc_weight_variable([500,500], "fcm3") self.W_fcm3b, self.b_fcm3b = self._fc_weight_variable([500,500], "fcm3b") self.W_fcm4, self.b_fcm4 = self._fc_weight_variable([500,1], "fcm4") h_fc1 = tf.nn.relu(tf.matmul(XJointOrMarg, self.W_fcm1) + self.b_fcm1) h_fc2 = tf.nn.relu(tf.matmul(h_fc1, self.W_fcm2) + self.b_fcm2) h_fc3 = tf.nn.relu(tf.matmul(h_fc2, self.W_fcm3) + self.b_fcm3) h_fc3b = tf.nn.relu(tf.matmul(h_fc3, self.W_fcm3b) + self.b_fcm3b) h_fc4 = tf.matmul(h_fc3b, self.W_fcm4) + self.b_fcm4 return h_fc4 def MINEr(self, XJointOrMarg, reusee): with tf.variable_scope("MINEr1", reuse = reusee): self.W_fcm1, self.b_fcm1 = self._fc_weight_variable([self.n3+self.r3, 500], "fcm1") self.W_fcm2, self.b_fcm2 = self._fc_weight_variable([500, 500], "fcm2") self.W_fcm3, self.b_fcm3 = self._fc_weight_variable([500,500], "fcm3") self.W_fcm3b, self.b_fcm3b = self._fc_weight_variable([500,500], "fcm3b") self.W_fcm3c, self.b_fcm3c = self._fc_weight_variable([500,500], "fcm3c") self.W_fcm3d, self.b_fcm3d = self._fc_weight_variable([500,100], "fcm3d") self.W_fcm4, self.b_fcm4 = self._fc_weight_variable([100,1], "fcm4") h_fc1 = tf.nn.relu(tf.matmul(XJointOrMarg, self.W_fcm1) + self.b_fcm1) h_fc2 = tf.nn.relu(tf.matmul(h_fc1, self.W_fcm2) + self.b_fcm2) h_fc3 = tf.nn.relu(tf.matmul(h_fc2, self.W_fcm3) + self.b_fcm3) h_fc3b = tf.nn.relu(tf.matmul(h_fc3, self.W_fcm3b) + self.b_fcm3b) h_fc3c = tf.nn.relu(tf.matmul(h_fc3b, self.W_fcm3c) + self.b_fcm3c) h_fc3d = tf.nn.relu(tf.matmul(h_fc3c, self.W_fcm3d) + self.b_fcm3d) h_fc4 = tf.matmul(h_fc3d, self.W_fcm4) + self.b_fcm4 return h_fc4 def binary2_activation(self,x): cond = tf.less(x, 0.5*tf.ones(tf.shape(x))) out = tf.where(cond, -tf.ones(tf.shape(x)), tf.ones(tf.shape(x))) return out def H_encoder(self, x1,x2, reusee): with tf.variable_scope('H_encoder', reuse = reusee): self.W_fcm1, self.b_fcm1 = self._fc_weight_variable([self.k3, 500], "fcm1") self.W_fcm2, self.b_fcm2 = self._fc_weight_variable([500, 500], "fcm2") self.W_fcm3, self.b_fcm3 = self._fc_weight_variable([500,500], "fcm3") self.W_fcm4, self.b_fcm4 = self._fc_weight_variable([500,256], "fcm4") self.W_fcm1W, self.b_fcm1W = self._fc_weight_variable([self.r3, 500], "fcm1W") self.W_fcm2W, self.b_fcm2W = self._fc_weight_variable([500, 500], "fcm2W") self.W_fcm3W, self.b_fcm3W = self._fc_weight_variable([500,500], "fcm3W") self.W_fcm4W, self.b_fcm4W = self._fc_weight_variable([500,256], "fcm4W") self.W_fcm9W, self.b_fcm9W = self._fc_weight_variable([1024,self.n3], "fcm9W") jjk=x1#tf.concat([x1,x2],1) h_fc1 = tf.nn.relu(tf.matmul(jjk, self.W_fcm1) + self.b_fcm1) h_fc2 = tf.nn.relu(tf.matmul(h_fc1, self.W_fcm2) + self.b_fcm2) h_fc3 = tf.nn.relu(tf.matmul(h_fc2, self.W_fcm3) + self.b_fcm3) h_fc4 = tf.nn.tanh(tf.matmul(h_fc3, self.W_fcm4) + self.b_fcm4) jjkW=x2 h_fc1W = tf.nn.relu(tf.matmul(jjkW, self.W_fcm1W) + self.b_fcm1W) h_fc2W = tf.nn.relu(tf.matmul(h_fc1W, self.W_fcm2W) + self.b_fcm2W) h_fc3W = tf.nn.relu(tf.matmul(h_fc2W, self.W_fcm3W) + self.b_fcm3W) h_fc4W = tf.nn.tanh(tf.matmul(h_fc3W, self.W_fcm4W) + self.b_fcm4W) h_fc4V=h_fc4 -40* h_fc4W input_layer = tf.reshape(h_fc4V, [-1, 32, 8]) conv1 = tf.layers.conv1d(inputs=input_layer, filters=16, kernel_size=4, padding="same", activation=tf.nn.relu) conv2 = tf.layers.conv1d(inputs=conv1, filters=32, kernel_size=4, padding="same", activation=tf.nn.relu) cnn = tf.layers.flatten(conv2) h_fc6 = binary_tanh_unit(tf.matmul(cnn, self.W_fcm9W) + self.b_fcm9W) return h_fc6 def B_decoder(self, x, reusee): with tf.variable_scope('B_decoder', reuse = reusee): self.W_fcm1, self.b_fcm1 = self._fc_weight_variable([self.n3, 500], "fcm1") self.W_fcm2, self.b_fcm2 = self._fc_weight_variable([500, 500], "fcm2") self.W_fcm3, self.b_fcm3 = self._fc_weight_variable([500,500], "fcm3") self.W_fcm3b, self.b_fcm3b = self._fc_weight_variable([500,500], "fcm3b") self.W_fcm4, self.b_fcm4 = self._fc_weight_variable([500,self.k3], "fcm4") self.W_fcm4X, self.b_fcm4X = self._fc_weight_variable([500,self.r3], "fcm4X") h_fc1 = tf.nn.relu(tf.matmul(x, self.W_fcm1) + self.b_fcm1) h_fc2 = tf.nn.relu(tf.matmul(h_fc1 , self.W_fcm2) + self.b_fcm2) h_fc3 = tf.nn.relu(tf.matmul(h_fc2, self.W_fcm3) + self.b_fcm3) h_fc3b = tf.nn.relu(tf.matmul(h_fc3, self.W_fcm3b) + self.b_fcm3b) h_fc4 = tf.nn.sigmoid(tf.matmul(h_fc3b, self.W_fcm4) + self.b_fcm4) h_fc4X = tf.nn.sigmoid(tf.matmul(h_fc3b, self.W_fcm4X) + self.b_fcm4X) return h_fc4,h_fc4X def B_decoder_for_random(self, x, reusee): with tf.variable_scope('B_decoder_for_random', reuse = reusee): self.W_fcm1, self.b_fcm1 = self._fc_weight_variable([self.n3, 500], "fcm1") self.W_fcm2, self.b_fcm2 = self._fc_weight_variable([500, 500], "fcm2") self.W_fcm3, self.b_fcm3 = self._fc_weight_variable([500,500], "fcm3") self.W_fcm3b, self.b_fcm3b = self._fc_weight_variable([500,500], "fcm3b") self.W_fcm4, self.b_fcm4 = self._fc_weight_variable([500,self.r3], "fcm4") h_fc1 = tf.nn.relu(tf.matmul(x, self.W_fcm1) + self.b_fcm1) h_fc2 = tf.nn.relu(tf.matmul(h_fc1, self.W_fcm2) + self.b_fcm2) h_fc3 = tf.nn.relu(tf.matmul(h_fc2, self.W_fcm3) + self.b_fcm3) h_fc3b = tf.nn.relu(tf.matmul(h_fc3, self.W_fcm3b) + self.b_fcm3b) h_fc4 = tf.nn.sigmoid(tf.matmul(h_fc3b, self.W_fcm4) + self.b_fcm4) return h_fc4 def E_decoder(self, x, reusee): with tf.variable_scope('E_decoder', reuse = reusee): self.W_fcm1, self.b_fcm1 = self._fc_weight_variable([self.n3, 500], "fcm1") self.W_fcm2, self.b_fcm2 = self._fc_weight_variable([500, 500], "fcm2") self.W_fcm3, self.b_fcm3 = self._fc_weight_variable([500,500], "fcm3") self.W_fcm3b, self.b_fcm3b = self._fc_weight_variable([500,500], "fcm3b") self.W_fcm4, self.b_fcm4 = self._fc_weight_variable([500,self.k3], "fcm4") h_fc1 = tf.nn.relu(tf.matmul(x, self.W_fcm1) + self.b_fcm1) h_fc2 = tf.nn.relu(tf.matmul(h_fc1 , self.W_fcm2) + self.b_fcm2) h_fc3 = tf.nn.relu(tf.matmul(h_fc2, self.W_fcm3) + self.b_fcm3) h_fc3b = tf.nn.relu(tf.matmul(h_fc3, self.W_fcm3b) + self.b_fcm3b) h_fc4 = tf.nn.sigmoid(tf.matmul(h_fc3b, self.W_fcm4) + self.b_fcm4) return h_fc4 def _create_loss_optimizer(self): # Reconstruction loss self.lr1 = tf.placeholder(tf.float32, shape=[]) # encoder, labelled data self.c_M= self.H_encoder(self.x_M,self.x_R, False) self.c3 =self.c_M self.y3=self.c3+np.sqrt(self.N0/2)*self.n_1 self.y3b=self.c3+np.sqrt(self.N0b/2)*self.n_2 decodedM,decodedR=self.B_decoder(self.y3b, reusee = False) self.ML1=tf.reduce_mean(tf.reduce_sum(tf.abs(self.x_M-decodedM), axis=1)) self.ML1_for_random=tf.reduce_mean(tf.reduce_sum(tf.abs(self.x_R-decodedR), axis=1)) EdecodedM=self.E_decoder(self.y3, reusee = False) self.Eveloss = tf.reduce_mean(tf.reduce_sum(tf.abs(self.x_M-EdecodedM), axis=1)) E_decoder_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='E_decoder') self.EoptimizerRandom = tf.train.AdamOptimizer(self.lr1).minimize(self.Eveloss , var_list=E_decoder_vars) self.BobBer=tf.reduce_mean(tf.to_float(tf.not_equal(2*self.x_M-1,self.binary2_activation(decodedM)))) self.BobBer_for_random=tf.reduce_mean(tf.to_float(tf.not_equal(2*self.x_R-1,self.binary2_activation(decodedR)))) self.EveBer=self.BobBer self.xMINE=self.y3 self.yMINE= self.x_M self.XJoint=tf.concat([self.xMINE, self.yMINE], 1) self.xMINEshuff=tf.gather(self.xMINE, self.PermutationIndices) self.XMarg=tf.concat([self.xMINEshuff, self.yMINE], 1) y2_joint= self.MINE(self.XJoint,reusee = False) y2_marg= self.MINE(self.XMarg,reusee = True) aya2z=tf.reduce_mean(tf.exp(y2_marg)) aya22z=tf.reduce_mean(y2_joint) MI=aya22z-tf.log(aya2z) self.MINEloss=-MI self.xMINE=self.c3 self.yMINE= self.x_M self.XJoint=tf.concat([self.xMINE, self.yMINE], 1) self.xMINEshuff=tf.gather(self.xMINE, self.PermutationIndices) self.XMarg=tf.concat([self.xMINEshuff, self.yMINE], 1) y2_joint= self.MINEk(self.XJoint,reusee = False) y2_marg= self.MINEk(self.XMarg,reusee = True) aya2z=tf.reduce_mean(tf.exp(y2_marg)) aya22z=tf.reduce_mean(y2_joint) MI=aya22z-tf.log(aya2z) self.MINElossk=-MI self.xMINE=self.c3 self.yMINE= self.x_R self.XJoint=tf.concat([self.xMINE, self.yMINE], 1) self.xMINEshuff=tf.gather(self.xMINE, self.PermutationIndices) self.XMarg=tf.concat([self.xMINEshuff, self.yMINE], 1) y2_joint= self.MINEr(self.XJoint,reusee = False) y2_marg= self.MINEr(self.XMarg,reusee = True) aya2z=tf.reduce_mean(tf.exp(y2_marg)) aya22z=tf.reduce_mean(y2_joint) MI=aya22z-tf.log(aya2z) self.MINElossr=-MI MINE1_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='MINE1') MINEk1_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='MINEk1') MINEr1_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='MINEr1') self.optimizerMINE1 = tf.train.AdamOptimizer(learning_rate=self.lr1).minimize(self.MINEloss, var_list=MINE1_vars) self.optimizerMINEk1 = tf.train.AdamOptimizer(learning_rate=self.lr1).minimize(self.MINElossk, var_list=MINEk1_vars) self.optimizerMINEr1 = tf.train.AdamOptimizer(learning_rate=self.lr1/2).minimize(self.MINElossr, var_list=MINEr1_vars) self.ML2=-self.MINEloss self.Mainloss = self.ML1+self.ML1_for_random self.MainlossB =self.ML1_for_random self.Mainloss2 =(tf.abs(self.ML2)) def JJDOUBLE(xx1,xx2): if (xx1 is None) and (xx2 is None): return None elif (xx1 is None) and (xx2 is not None): return xx2 elif (xx1 is not None) and (xx2 is None): return None elif (xx1 is not None) and (xx2 is not None): return tf.minimum(tf.norm(xx1),tf.norm(xx2))*(xx1/tf.norm(xx1))#xx2+0.25*xx1# optT = binary2.AdamOptimizer(binary2.get_all_LR_scale(), self.lr1) optTB = binary2.AdamOptimizer(binary2.get_all_LR_scale(), self.lr1) opt2TB = tf.train.AdamOptimizer(self.lr1) dec_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='H_encoder') B_decoder_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='B_decoder') B_decoder_vars_for_random = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='B_decoder_for_random') optA = binary2.AdamOptimizer(binary2.get_all_LR_scale(), self.lr1) optA2 = tf.train.AdamOptimizer(self.lr1) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) optimizerMessageVARS=[var for var in dec_vars ] optimizerMessageVARSBinary= [var for var in dec_vars] gvsBERMessage = opt2TB.compute_gradients(self.Mainloss, var_list=optimizerMessageVARS+B_decoder_vars) gvsMIk = opt2TB.compute_gradients(self.MINElossk+self.Mainloss2, var_list=optimizerMessageVARS) gvsTEST = opt2TB.compute_gradients(self.Mainloss, var_list=optimizerMessageVARS ) gradTEST,varTEST=zip(*gvsTEST ) self.wwgg=gradTEST gradgvsBER,vargvsBER=zip(*gvsBERMessage ) gradgvsMIk,vargvsMIk=zip(*gvsMIk) #gradgvsMIr,vargvsMIr=zip(*gvsMIr) gradgvsMIk=gradgvsMIk+ tuple([None] * (len(gradgvsBER)-len(gradgvsMIk))) assert len(gradgvsBER)==len(gradgvsMIk)#==len(gradgvsMIr) wholeGradJJ=[JJDOUBLE(grad1,grad2) for grad1, grad2 in zip(gradgvsMIk, gradgvsBER)] #print(wholeGradJJ) self.optimizerMessage = opt2TB.apply_gradients(zip(wholeGradJJ,vargvsBER))