def build_decoder(batch_size, inputenc, name="decoder"): with tf.variable_scope(name): g1 = fc(inputenc, 64, scope='dec_fc1', activation_fn=tf.nn.relu) g2 = fc(g1, 128, scope='dec_fc2', activation_fn=tf.nn.relu) g3 = fc(g2, 256, scope='dec_fc3', activation_fn=tf.nn.relu) x_hat = fc(g3, input_dim, scope='dec_fc4', activation_fn=tf.sigmoid) return x_hat
def _net(net, hidden_layer_size=16): net = fc(net, hidden_layer_size, activation_fn=tf.nn.sigmoid, scope='fc0', weights_initializer =\ tf.random_normal_initializer(stddev=1/np.sqrt(observation_size))) net = fc(net, action_size, activation_fn=tf.nn.softmax, scope='fc1', weights_initializer =\ tf.random_normal_initializer(stddev=1/np.sqrt(hidden_layer_size))) return net
def createNetworkDU(self, X, iStep): with tf.compat.v1.variable_scope("NetWork"+str(iStep), reuse=tf.compat.v1.AUTO_REUSE): fPrev= fc(X, int(self.layerSize[0]), scope='enc_fc1', activation_fn=self.activation) for i in np.arange(len(self.layerSize)-1): scopeName='enc_fc'+str(i+2) f = fc(fPrev,int(self.layerSize[i+1]), scope=scopeName, activation_fn=self.activation) fPrev = f Z = fc(fPrev,self.d, scope='uPDu',activation_fn= None) return Z
def createNetwork(self, X,iStep, renormalizeFactor): with tf.compat.v1.variable_scope("NetWorkU"+str(iStep) , reuse=tf.compat.v1.AUTO_REUSE): fPrev= fc(X, int(self.layerSize[0]), scope='enc_fc1', activation_fn=self.activation) for i in np.arange(len(self.layerSize)-1): scopeName='enc_fc'+str(i+2) f = fc(fPrev,int(self.layerSize[i+1]), scope=scopeName, activation_fn=self.activation) fPrev = f UZ = fc(fPrev,1, scope='UZ',activation_fn= None) return UZ[:,0]
def createNetwork(self, t, x): time_and_X = tf.concat([t,x], axis=-1) with tf.variable_scope("NetWork" , reuse=tf.AUTO_REUSE): fPrev= fc(time_and_X, self.layerSize[0], scope='enc_fc1', activation_fn=self.activation) for i in np.arange(len(self.layerSize)-1): scopeName='enc_fc'+str(i+2) f = fc(fPrev,self.layerSize[i+1], scope=scopeName, activation_fn=self.activation) fPrev = f U = fc(fPrev,1, scope='uPDu',activation_fn= None) return U
def build(self): # input self.x = tf.placeholder(name='x', dtype=tf.float32, shape=[None, input_dim]) # encoder # slim.fc(input, outputdim, scope, act_fn) f1 = fc(self.x, 512, scope='enc_fc1', activation_fn=tf.nn.elu) f2 = fc(f1, 384, scope='enc_fc2', activation_fn=tf.nn.elu) f3 = fc(f2, 256, scope='enc_fc3', activation_fn=tf.nn.elu) self.z_mu = fc(f3, self.n_z, scope='enc_fc4_mu', activation_fn=None) # log (sigma^2) self.z_log_sigma_sq = fc(f3, self.n_z, scope='enc_fc4_sigma', activation_fn=None) # N(z_mu, z_sigma) eps = tf.random_normal(shape=tf.shape(self.z_log_sigma_sq), mean=0, stddev=1, dtype=tf.float32) # Unigaussian self.z = self.z_mu + tf.sqrt(tf.exp( self.z_log_sigma_sq)) * eps # Reversing to get back sigma # decoder g1 = fc(self.z, 256, scope='dec_fc1', activation_fn=tf.nn.elu) g2 = fc(g1, 384, scope='dec_fc2', activation_fn=tf.nn.elu) g3 = fc(g2, 512, scope='dec_fc3', activation_fn=tf.nn.elu) self.x_hat = fc(g3, input_dim, scope='dec_fc4', activation_fn=tf.sigmoid) # sigmoid b/c onehot encoded # losses # reconstruction loss # x <-> x_hat # H(x, x_hat) = - \Sigma x * log(x_hat) + (1-x)*log(1-x_hat) epsilon = 1e-10 # to prevent log(0) recon_loss = -tf.reduce_sum( self.x * tf.log(self.x_hat + epsilon) + (1 - self.x) * tf.log(1 - self.x_hat + epsilon), axis=1) # latent loss # KL divergence: measure the difference between two distributions # the latent distribution and N(0, 1) latent_loss = -0.5 * tf.reduce_sum(1 + self.z_log_sigma_sq - tf.square( self.z_mu) - tf.exp(self.z_log_sigma_sq), axis=1) # total loss self.total_loss = tf.reduce_mean(recon_loss + latent_loss) # optimizer self.train_op = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(self.total_loss)
def generator(self, z, reuse=False): with tf.variable_scope('generator') as scope: if reuse: scope.reuse_variables() w1 = tf.Variable(tf.random_normal(shape=[])) g1 = fc(z, self.g_hidden_size, scope='gen_fc1', activation_fn=tf.nn.relu) g_log = fc(g1, self.img_dim, scope='gen_fc2', activation_fn=None) g2 = tf.nn.sigmoid(g_log) return g_log, g2
def createNetworkNotTrainable(self, X, iStep, weightInit, biasInit): with tf.compat.v1.variable_scope("NetWorkGamNotTrain" + "_" + str(iStep), reuse=False): cMinW = 0 cMinB = 0 fPrev = fc(X, int(self.layerSize[0]), scope='enc_fc1', activation_fn=self.activation, weights_initializer=tf.constant_initializer( np.reshape(weightInit[:self.d * self.layerSize[0]], [self.d, self.layerSize[0]])), biases_initializer=tf.constant_initializer( biasInit[0:self.layerSize[0]]), trainable=False) cMinW += self.d * self.layerSize[0] cMinB += self.layerSize[0] for i in np.arange(len(self.layerSize) - 1): scopeName = 'enc_fc' + str(i + 2) f = fc(fPrev, int(self.layerSize[i + 1]), scope=scopeName, activation_fn=self.activation, weights_initializer=tf.constant_initializer( np.reshape( weightInit[cMinW:cMinW + self.layerSize[i] * self.layerSize[i + 1]], [self.layerSize[i], self.layerSize[i + 1]])), biases_initializer=tf.constant_initializer( biasInit[cMinB:cMinB + self.layerSize[i + 1]]), trainable=False) cMinW += self.layerSize[i] * self.layerSize[i + 1] cMinB += self.layerSize[i + 1] fPrev = f # D2U -> d^2 sizeFin = int(self.d * self.d) ZGam = fc( fPrev, sizeFin, scope='Gam', activation_fn=None, weights_initializer=tf.constant_initializer( np.reshape( weightInit[cMinW:cMinW + self.layerSize[len(self.layerSize) - 1] * sizeFin], [self.layerSize[len(self.layerSize) - 1], sizeFin])), biases_initializer=tf.constant_initializer( biasInit[cMinB:cMinB + sizeFin]), trainable=False) return tf.reshape(ZGam, [tf.shape(X)[0], self.d, self.d])
def build(self): self.x = tf.placeholder(name='x', dtype=tf.float32, shape=[None, input_dim]) n_hidden_f1 = 512 n_hidden_f2 = 384 n_hidden_f3 = 256 # Encode # x -> z_mean, z_sigma -> z f1 = fc(self.x, n_hidden_f1, scope='enc_fc1', activation_fn=tf.nn.elu) # AUTOREUSE f2 = fc(f1, n_hidden_f2, scope='enc_fc2', activation_fn=tf.nn.elu) f3 = fc(f2, n_hidden_f3, scope='enc_fc3', activation_fn=tf.nn.elu) self.z_mu = fc(f3, self.n_z, scope='enc_fc4_mu', activation_fn=None) self.z_log_sigma_sq = fc(f3, self.n_z, scope='enc_fc4_sigma', activation_fn=None) eps = tf.random_normal(shape=tf.shape(self.z_log_sigma_sq), mean=0, stddev=1, dtype=tf.float32) self.z = self.z_mu + tf.sqrt(tf.exp(self.z_log_sigma_sq)) * eps # Decode # z -> x_hat g1 = fc(self.z, n_hidden_f3, scope='dec_fc1', activation_fn=tf.nn.elu) g2 = fc(g1, n_hidden_f2, scope='dec_fc2', activation_fn=tf.nn.elu) g3 = fc(g2, n_hidden_f1, scope='dec_fc3', activation_fn=tf.nn.elu) self.x_hat = fc(g3, input_dim, scope='dec_fc4', activation_fn=tf.sigmoid) # Loss: Reconstruction loss: Minimize the cross-entropy loss # H(x, x_hat) = -\Sigma x*log(x_hat) + (1-x)*log(1-x_hat) epsilon = 1e-10 recon_loss = -tf.reduce_sum( self.x * tf.log(epsilon + self.x_hat) + (1 - self.x) * tf.log(epsilon + 1 - self.x_hat), axis=1) self.recon_loss = tf.reduce_mean(recon_loss) # Latent loss # Kullback Leibler divergence: measure the difference between two distributions # Here we measure the divergence between the latent distribution and N(0, 1) latent_loss = -0.5 * tf.reduce_sum(1 + self.z_log_sigma_sq - tf.square( self.z_mu) - tf.exp(self.z_log_sigma_sq), axis=1) self.latent_loss = tf.reduce_mean(latent_loss) self.total_loss = tf.reduce_mean(recon_loss + latent_loss) self.train_op = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(self.total_loss) return
def build(self): self.x = tf.placeholder(name='x', dtype=tf.float32, shape=[None, self.input_dim]) # Encode # x -> z_mean, z_sigma -> z f1 = fc(self.x, 256, scope='enc_fc1', activation_fn=tf.nn.relu) f2 = fc(f1, 128, scope='enc_fc2', activation_fn=tf.nn.relu) f3 = fc(f2, 64, scope='enc_fc3', activation_fn=tf.nn.relu) self.z_mu = fc(f3, self.n_z, scope='enc_fc4_mu', activation_fn=None) self.z_log_sigma_sq = fc(f3, self.n_z, scope='enc_fc4_sigma', activation_fn=None) eps = tf.random_normal(shape=tf.shape(self.z_log_sigma_sq), mean=0, stddev=1, dtype=tf.float32) self.z = self.z_mu + tf.sqrt(tf.exp(self.z_log_sigma_sq)) * eps # Decode # z -> x_hat g1 = fc(self.z, 64, scope='dec_fc1', activation_fn=tf.nn.relu) g2 = fc(g1, 128, scope='dec_fc2', activation_fn=tf.nn.relu) g3 = fc(g2, 256, scope='dec_fc3', activation_fn=tf.nn.relu) self.x_hat = fc(g3, self.input_dim, scope='dec_fc4', activation_fn=tf.sigmoid) # Loss # Reconstruction loss # Mean-squared error loss self.recon_loss = tf.reduce_mean( tf.squared_difference(self.x, self.x_hat)) # Latent loss # KL divergence: measure the difference between two distributions # Here we measure the divergence between # the latent distribution and N(0, 1) latent_loss = -0.5 * tf.reduce_sum(1 + self.z_log_sigma_sq - tf.square( self.z_mu) - tf.exp(self.z_log_sigma_sq), axis=1) self.latent_loss = tf.reduce_mean(latent_loss) self.total_loss = self.recon_loss + self.latent_loss self.train_op = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(self.total_loss) self.losses = { 'recon_loss': self.recon_loss, 'latent_loss': self.latent_loss, 'total_loss': self.total_loss, } return
def createNetworkWithInitializer(self, X, iStep, weightInit, biasInit, renormalizeFactor): with tf.compat.v1.variable_scope("NetWorkUZ" + str(iStep), reuse=tf.compat.v1.AUTO_REUSE): cMinW = 0 cMinB = 0 fPrev = fc(X, int(self.layerSize[0]), scope='enc_fc1', activation_fn=self.activation, weights_initializer=tf.constant_initializer( np.reshape(weightInit[:self.d * self.layerSize[0]], [self.d, self.layerSize[0]])), biases_initializer=tf.constant_initializer( biasInit[0:self.layerSize[0]]), trainable=True) cMinW += self.d * self.layerSize[0] cMinB += self.layerSize[0] for i in np.arange(len(self.layerSize) - 1): scopeName = 'enc_fc' + str(i + 2) f = fc(fPrev, int(self.layerSize[i + 1]), scope=scopeName, activation_fn=self.activation, weights_initializer=tf.constant_initializer( np.reshape( weightInit[cMinW:cMinW + self.layerSize[i] * self.layerSize[i + 1]], [self.layerSize[i], self.layerSize[i + 1]])), biases_initializer=tf.constant_initializer( biasInit[cMinB:cMinB + self.layerSize[i + 1]]), trainable=True) cMinW += self.layerSize[i] * self.layerSize[i + 1] cMinB += self.layerSize[i + 1] fPrev = f UDU = fc( fPrev, self.d + 1, scope='UZ', activation_fn=None, weights_initializer=tf.constant_initializer( np.reshape( weightInit[cMinW:cMinW + self.layerSize[len(self.layerSize) - 1] * (self.d + 1)], [self.layerSize[len(self.layerSize) - 1], self.d + 1 ])), biases_initializer=tf.constant_initializer( biasInit[cMinB:cMinB + (self.d + 1)]), trainable=True) return UDU[:, 0], UDU[:, 1:]
def __build_graph__(self, layers, cond_sz): input_sz = layers[0] latent_sz = layers[-1] / 2 # encoder (parametrization of approximate posterior q(z|x)) x = tf.placeholder(tf.float32, [None, input_sz]) # input layer y = tf.placeholder(tf.float32, [None, cond_sz]) # input layer with tf.variable_scope('encoder', reuse=False): fc_x = tf.concat([x, y], axis=1) for hidden in layers[1:-1]: # hidden layers fc_x = fc(fc_x, hidden) z_param = fc(fc_x, latent_sz * 2, activation_fn=None) z_log_sigma_sq = z_param[:, : latent_sz] # log deviation square of q(z|x) z_mu = z_param[:, latent_sz:] # mean of q(z|x) # sample latent variable z from q(z|x) eps = tf.random_normal(shape=tf.shape(z_log_sigma_sq)) z = tf.sqrt(tf.exp(z_log_sigma_sq)) * eps + z_mu # decoder (parametrization of likelihood p(x|z)) # it follows the mirror structure of encoder with tf.variable_scope('decoder', reuse=False): fc_z = tf.concat([z, y], axis=1) for hidden in layers[::-1][1:-1]: # hidden layers fc_z = fc(fc_z, hidden) x_hat = fc(fc_z, input_sz, activation_fn=tf.sigmoid) # reconstruction layer # loss: negative of Evidence Lower BOund (ELBO) # 1. KL-divergence: KL(q(z|x)||p(z)) # (divergence between two multi-variate normal distribution, please refer to wikipedia) kl_loss = -tf.reduce_mean(0.5 * tf.reduce_sum( \ 1+z_log_sigma_sq-tf.square(z_mu)-tf.exp(z_log_sigma_sq), axis=1)) # 2. Likelihood: p(x|z) # also called as reconstruction loss # we parametrized it with binary cross-entropy loss as MNIST contains binary images eps = 1e-10 # add small number to avoid log(0.0) recon_loss = tf.reduce_mean(-tf.reduce_sum( \ x * tf.log(eps + x_hat) + (1 - x) * tf.log(1 - x_hat + eps), axis=1)) total_loss = kl_loss + 3 * recon_loss # record variables self.z = z self.total_loss, self.recon_loss, self.kl_loss = total_loss, recon_loss, kl_loss self.x, self.y, self.x_hat = x, y, x_hat
def build_encoder(batch_size, inputenc, name="encoder"): with tf.variable_scope(name): # Encode # x -> z_mean, z_sigma -> z # inputenc = tf.reshape(inputenc, [-1, inputenc.getshape().as_list()[0]]) f1 = fc(inputenc, 256, scope='enc_fc1', activation_fn=tf.nn.relu) f2 = fc(f1, 128, scope='enc_fc2', activation_fn=tf.nn.relu) f3 = fc(f2, 64, scope='enc_fc3', activation_fn=tf.nn.relu) z_mu = fc(f3, n_z, scope='enc_fc4_mu', activation_fn=None) z_log_sigma_sq = fc(f3, n_z, scope='enc_fc4_sigma', activation_fn=None) eps = tf.random_normal(shape=tf.shape(z_log_sigma_sq), mean=0, stddev=1, dtype=tf.float32) z = z_mu + tf.sqrt(tf.exp(z_log_sigma_sq)) * eps return f2, z
def build(self): self.x = tf.placeholder(name='x', dtype=tf.float32, shape=[None, self.input_dim]) f1 = fc(self.x, 10, scope='fc1', activation_fn=tf.nn.elu) f2 = fc(f1, 10, scope='fc2', activation_fn=tf.nn.elu) self.strategy = fc(f2, self.n, scope='fc3', activation_fn=tf.nn.sigmoid) self.strategy = self.strategy / tf.reduce_sum(self.strategy) self.u = self.utility(self.x, self.strategy, self.input_dim) # Loss self.train_op = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(self.u) return
def createNetwork(self, X, iStep, renormalizeFactor): with tf.compat.v1.variable_scope("NetWorkGam" + str(iStep), reuse=tf.compat.v1.AUTO_REUSE): fPrev = fc(X, int(self.layerSize[0]), scope='enc_fc1', activation_fn=self.activation) for i in np.arange(len(self.layerSize) - 1): scopeName = 'enc_fc' + str(i + 2) f = fc(fPrev, int(self.layerSize[i + 1]), scope=scopeName, activation_fn=self.activation) fPrev = f # D2U -> d^2 sizeFin = int(self.d * self.d) ZGam = fc(fPrev, sizeFin, scope='Gam', activation_fn=None) return tf.reshape(ZGam, [tf.shape(X)[0], self.d, self.d])
def createNetworkWithInitializer(self, X, iStep, weightInit, biasInit, renormalizeFactor): with tf.compat.v1.variable_scope("NetWork"+str(iStep) , reuse=tf.compat.v1.AUTO_REUSE): cMinW =0 cMinB= 0 fPrev= fc(X, int(self.layerSize[0]), scope='enc_fc1', activation_fn=self.activation, weights_initializer=tf.constant_initializer(np.reshape(weightInit[:self.d*int(self.layerSize[0])],[self.d,int(self.layerSize[0])])), biases_initializer= tf.constant_initializer(biasInit[0:int(self.layerSize[0])]) ) cMinW += self.d*int(self.layerSize[0]) cMinB += int( self.layerSize[0]) for i in np.arange(len(self.layerSize)-1): scopeName='enc_fc'+str(i+2) f = fc(fPrev,int(self.layerSize[i+1]), scope=scopeName, activation_fn=self.activation, weights_initializer=tf.constant_initializer(np.reshape(weightInit[cMinW : cMinW+int(self.layerSize[i])*int(self.layerSize[i+1])],[int(self.layerSize[i]),int(self.layerSize[i+1])])) , biases_initializer= tf.constant_initializer(biasInit[cMinB:cMinB+int(self.layerSize[i+1])])) cMinW+= int(self.layerSize[i])*int(self.layerSize[i+1]) cMinB+= int(self.layerSize[i+1]) fPrev = f U = fc(fPrev,1, scope='U',activation_fn= None, weights_initializer=tf.constant_initializer(np.reshape(weightInit[cMinW:cMinW+int(self.layerSize[len(self.layerSize)-1])], [int(self.layerSize[len(self.layerSize)-1]),1])), biases_initializer= tf.constant_initializer(biasInit[cMinB:cMinB+1])) DU = tf.gradients(U,X) return U[:,0], DU[0]/renormalizeFactor
def decoder(latent_var, hidden_dim, n_layers, activation, drop_rate, is_training): """ decoder function :param latent_var: latent space sample :param hidden_dim: number of nodes in hidden layers :param n_layers: number of hidden layers :param activation: activation function :param drop_rate: dropout rate :param is_training: dropout during network traning :returns: last hidden layer """ hidden_dec = [] hidden_dec_bn = [] for i in range(n_layers): if i == 0: hidden_dec.append( fc(latent_var, hidden_dim, scope="hidden_dec%i" % i)) hidden_dec_bn.append( tf.layers.batch_normalization(hidden_dec[i], name="hidden_dec%i_bn" % i)) if drop_rate > 0: hidden_dec_bn.append( tf.layers.dropout(hidden_dec_bn[i], rate=drop_rate, name="hiddden_dec%i_dp" % i, training=is_training)) else: hidden_dec_bn.append(hidden_dec_bn[i]) else: hidden_dec.append( fc(hidden_dec_bn[i], hidden_dim, scope="hidden_dec%i" % i)) hidden_dec_bn.append( tf.layers.batch_normalization(hidden_dec[i], name="hidden_dec%i_bn" % i)) if drop_rate > 0: hidden_dec_bn.append( tf.layers.dropout(hidden_dec_bn[i + 1], rate=drop_rate, name="hiddden_dec%i_dp" % i, training=is_training)) return hidden_dec_bn[-1]
def encoder(batch, hidden_dim, n_layers, activation, drop_rate, is_training): """ encoder function :param batch: normalized daya batch :param hidden_dim: number of nodes in hidden layers :param n_layers: number of hidden layers :param activation: activation function :param drop_rate: dropout rate :param is_training: dropout during network traning :returns: last hidden layer """ hidden_enc = [] hidden_enc_bn = [] for i in range(n_layers): if i == 0: hidden_enc.append(fc(batch, hidden_dim, scope="hidden_in%i" % i)) hidden_enc_bn.append( tf.layers.batch_normalization(hidden_enc[i], name="hidden_in%i_bn" % i)) if drop_rate > 0: hidden_enc_bn.append( tf.layers.dropout(hidden_enc_bn[i], rate=drop_rate, name="hiddden_in%i_dp" % i, training=is_training)) else: hidden_enc_bn.append(hidden_enc_bn[i]) else: hidden_enc.append( fc(hidden_enc_bn[i], hidden_dim, scope="hidden_in%i" % i)) hidden_enc_bn.append( tf.layers.batch_normalization(hidden_enc[i], name="hidden_in%i_bn" % i)) if drop_rate > 0: hidden_enc_bn.append( tf.layers.dropout(hidden_enc_bn[i + 1], rate=drop_rate, name="hiddden_in%i_dp" % i, training=is_training)) return hidden_enc_bn[-1]
def discriminator(self, x, reuse=False): with tf.variable_scope( 'discriminator', reuse=reuse) as scope: ### reuse = tf.AUTO_REUSE #if reuse: # scope.reuse_variables() ''' w1 = tf.Variable(tf.random_normal(shape = [x.get_shape()[1], self.d_hidden_size], dtype = tf.float32)) b1 = tf.Variable(tf.zeros([self.d_hidden_size],dtype=tf.float32)) w2 = tf.Variable(tf.random_normal(shape = [self.d_hidden_size, 1], dtype = tf.float32)) b2 = tf.Variable(tf.zeros([1],dtype=tf.float32)) h1 = tf.nn.relu(tf.matmul(x, w1) + b1) h2 = tf.matmul(h1,w2) + b2 h2_act = tf.nn.sigmoid(h2)''' d1 = fc(x, self.d_hidden_size, scope='dis_fc1', activation_fn=tf.nn.relu) d_log = fc(d1, 1, scope='dis_fc2', activation_fn=None) d2 = tf.nn.sigmoid(d_log) return d_log, d2
def build(self, input_dim): self.x = tf.placeholder(name='x', dtype=tf.float32, shape=[None, input_dim]) # Encode # x -> z_mean, z_sigma -> z f1 = fc(self.x, self.hidden_layers[0], scope='ae_enc_fc1', activation_fn=tf.nn.relu) # f2 = fc(f1, 60, scope='enc_fc2', activation_fn=tf.nn.tanh) f3 = fc(f1, self.hidden_layers[1], scope='ae_enc_fc3', activation_fn=tf.nn.relu) # f4 = fc(f3, 20, scope='enc_fc4', activation_fn=tf.nn.relu) self.z = fc(f3, self.hidden_layers[2], scope='ae_enc_fc5_mu', activation_fn=None) # Decode # z,y -> x_hat # g1 = fc(self.Z, 20, scope='dec_fc1', activation_fn=tf.nn.relu) g2 = fc(self.z, self.hidden_layers[1], scope='ae_dec_fc2', activation_fn=tf.nn.relu) g3 = fc(g2, self.hidden_layers[0], scope='ae_dec_fc3', activation_fn=tf.nn.relu) # g4 = fc(g3, 85, scope='dec_fc4', activation_fn=tf.nn.tanh) self.x_hat = fc(g3, input_dim, scope='ae_dec_fc5', activation_fn=tf.sigmoid) # self.x_res = self.x_hat[:,0:input_dim] # Loss # Reconstruction loss # Minimize the cross-entropy loss # H(x, x_hat) = -\Sigma x*log(x_hat) + (1-x)*log(1-x_hat) recon_loss = tf.reduce_mean(tf.square(self.x - self.x_hat), 1) # (((self.x - y)**2).mean(1)).mean() # epsilon = 1e-10 # recon_loss = -tf.reduce_sum( # self.x * tf.log(epsilon+self.x_hat) + (1-self.x) * tf.log(epsilon+1-self.x_hat), # axis=1 # ) self.recon_loss = tf.reduce_mean(recon_loss) self.train_op = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(self.recon_loss) return
def build(self): self.x = tf.placeholder(name='x', dtype=tf.float32, shape=[None, input_dim]) # Encode # x -> z_mean, z_sigma -> z # input 28*28= 784 f1 = fc(self.x, 512, scope='enc_fc1', activation_fn=tf.nn.elu) # fully connected 512 f2 = fc(f1, 384, scope='enc_fc2', activation_fn=tf.nn.elu) # fully connected 384 f3 = fc(f2, 256, scope='enc_fc3', activation_fn=tf.nn.elu) # fully connected 256 self.z_mu = fc(f3, self.n_z, scope='enc_fc4_mu', activation_fn=None) # fully connected to mu (default 10) self.z_log_sigma_sq = fc(f3, self.n_z, scope='enc_fc4_sigma', activation_fn=None) # fully connected to sigma (default 10) eps = tf.random_normal(shape=tf.shape(self.z_log_sigma_sq), # reparam trick mean=0, stddev=1, dtype=tf.float32) self.z = self.z_mu + tf.sqrt(tf.exp(self.z_log_sigma_sq)) * eps # combine mu with sigma * normal # Decode # z -> x_hat g1 = fc(self.z, 256, scope='dec_fc1', activation_fn=tf.nn.elu) # fully connected from 2*n_z to 256 g2 = fc(g1, 384, scope='dec_fc2', activation_fn=tf.nn.elu) # fully connected to 384 g3 = fc(g2, 512, scope='dec_fc3', activation_fn=tf.nn.elu) # fully connected to 512 self.x_hat = fc(g3, input_dim, scope='dec_fc4', activation_fn=tf.sigmoid) # fully connected to 784 # Loss # Reconstruction loss # Minimize the cross-entropy loss # H(x, x_hat) = -\Sigma x*log(x_hat) + (1-x)*log(1-x_hat) epsilon = 1e-10 recon_loss = -tf.reduce_sum( self.x * tf.log(epsilon+self.x_hat) + (1-self.x) * tf.log(epsilon+1-self.x_hat), axis=1 ) self.recon_loss = tf.reduce_mean(recon_loss) # Latent loss # Kullback Leibler divergence: measure the difference between two distributions # Here we measure the divergence between the latent distribution and N(0, 1) latent_loss = -0.5 * tf.reduce_sum( 1 + self.z_log_sigma_sq - tf.square(self.z_mu) - tf.exp(self.z_log_sigma_sq), axis=1) self.latent_loss = tf.reduce_mean(latent_loss) self.total_loss = tf.reduce_mean(recon_loss + latent_loss) self.train_op = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(self.total_loss) return
def createNetwork(self, X, iStep, renormalizeFactor): with tf.compat.v1.variable_scope("NetWorkUZ" + str(iStep), reuse=tf.compat.v1.AUTO_REUSE): fPrev = fc(X, int(self.layerSize[0]), scope='enc_fc1', activation_fn=self.activation) for i in np.arange(len(self.layerSize) - 1): scopeName = 'enc_fc' + str(i + 2) f = fc(fPrev, int(self.layerSize[i + 1]), scope=scopeName, activation_fn=self.activation) fPrev = f UZ = fc(fPrev, self.d + 1, scope='UZ', activation_fn=None) Gam = [] for id in range(self.d): Gam.append( tf.gradients(UZ[:, 1 + id], X)[0] / renormalizeFactor) Gam = tf.concat(Gam, axis=1) return UZ[:, 0], UZ[:, 1:], tf.reshape(Gam, [tf.shape(X)[0], self.d, self.d])
def build(self): self.x = tf.placeholder(name='x', dtype=tf.float32, shape=[None, input_dim]) # Encode # x -> z_mean, z_sigma -> z f1 = fc(self.x, self.hidden_layers[0], scope='vae_enc_fc1', activation_fn=tf.nn.relu) #f2 = fc(f1, 60, scope='enc_fc2', activation_fn=tf.nn.tanh) f3 = fc(f1, self.hidden_layers[1], scope='vae_enc_fc3', activation_fn=tf.nn.relu) #f4 = fc(f3, 20, scope='enc_fc4', activation_fn=tf.nn.relu) self.z_mu = fc(f3, self.hidden_layers[2], scope='vae_enc_fc5_mu', activation_fn=None) self.z_log_sigma_sq = fc(f3, self.hidden_layers[2], scope='vae_enc_fc5_sigma', activation_fn=None) eps = tf.random_normal(shape=tf.shape(self.z_log_sigma_sq), mean=0, stddev=1, dtype=tf.float32) self.z = self.z_mu + tf.sqrt(tf.exp(self.z_log_sigma_sq)) * eps # Decode # z,y -> x_hat # g1 = fc(self.Z, 20, scope='dec_fc1', activation_fn=tf.nn.relu) g2 = fc(self.z,self.hidden_layers[1], scope='vae_dec_fc2', activation_fn=tf.nn.relu) g3 = fc(g2, self.hidden_layers[0], scope='vae_dec_fc3', activation_fn=tf.nn.relu) #g4 = fc(g3, 85, scope='dec_fc4', activation_fn=tf.nn.tanh) self.x_hat = fc(g3, input_dim, scope='vae_dec_fc5', activation_fn=tf.sigmoid) #self.x_res = self.x_hat[:,0:input_dim] # Loss # Reconstruction loss recon_loss = tf.reduce_mean(tf.square(self.x - self.x_hat),1) #(((self.x - y)**2).mean(1)).mean() self.recon_loss = tf.reduce_mean(recon_loss) # Latent loss # Kullback Leibler divergence: measure the difference between two distributions # Here we measure the divergence between the latent distribution and N(0, 1) #original latent_loss = -0.5 * tf.reduce_sum( 1 + self.z_log_sigma_sq - tf.square(self.z_mu) - tf.exp(self.z_log_sigma_sq) , axis=1) self.latent_loss = tf.reduce_mean(latent_loss) self.total_loss = tf.reduce_mean(recon_loss +latent_loss) self.train_op = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(self.total_loss) return
def build_network(batch_size, inputenc, name="whole_network"): with tf.variable_scope(name): # Encode # x -> z_mean, z_sigma -> z f1 = fc(inputenc, 256, scope='enc_fc1', activation_fn=tf.nn.relu) f2 = fc(f1, 128, scope='enc_fc2', activation_fn=tf.nn.relu) f3 = fc(f2, 64, scope='enc_fc3', activation_fn=tf.nn.relu) z_mu = fc(f3, n_z, scope='enc_fc4_mu', activation_fn=None) z_log_sigma_sq = fc(f3, n_z, scope='enc_fc4_sigma', activation_fn=None) eps = tf.random_normal(shape=tf.shape(z_log_sigma_sq), mean=0, stddev=1, dtype=tf.float32) z = z_mu + tf.sqrt(tf.exp(z_log_sigma_sq)) * eps g1 = fc(z, 64, scope='dec_fc1', activation_fn=tf.nn.relu) g2 = fc(g1, 128, scope='dec_fc2', activation_fn=tf.nn.relu) g3 = fc(g2, 256, scope='dec_fc3', activation_fn=tf.nn.relu) x_hat = fc(g3, input_dim, scope='dec_fc4', activation_fn=tf.sigmoid) return f2, g2, x_hat, z_mu, z_log_sigma_sq
def build(self): #input self.x = tf.placeholder(name='x', dtype=tf.float32, shape=[None, input_dim]) #encoder #slim.fc(input, output_dim, scope, act_fn) f1= fc(self.x, 512, scope='enc_fc1', activation_fn = tf.nn.elu) f2= fc(f1, 384, scope='enc_fc2', activation_fn = tf.nn.elu) f3= fc(f2, 256, scope='enc_fc3', activation_fn = tf.nn.elu) self.z_mu = fc(f3,self.n_z, scope='enc_fc4_mu', activation_fn=None) #log(signma^2) self.z_log_sigma_sq = fc(f3,self.n_z, scope='enc_fc4_mu', activation_fn=None) #N(z_mu, z_sigma) eps = tf.random_normal(shape=tf.shape(self.z_log_sigma_sq),mean=0, stddev=1, dtype=tf.float32) self.z = self.z_mu + tf.sqrt(tf.exp(self.z_log_sigma_sq)) * eps #decoder g1 = fc(self.z, 256, scope='dec_fc1', activation_fn=tf.nn.elu) g2 = fc(g1, 384, scope='dec_fc2', activation_fn=tf.nn.elu) g3 = fc(g2, 512, scope='dec_fc3', activation_fn=tf.nn.elu) self.x_hat = fc(g3, input_dim, scope='dec_fc4', activation_fn=tf.sigmoid) #losses #reconstruction loss #x <--> x_hat #H(x,x_hat) = - \Sigma x * log(x_hat) + (1-x) * log(1-x_hat) epsilon = 1e-10 recon_loss = -tf.reduce_sum(self.x * tf.log(self.x_hat + epsilon) + (1 - self.x)*tf.log(1- self.x_hat + epsilon) ) #latest loss #KL divergence: measure the different between two distributions #the latest distribution and N(0,1) latent_loss = -0.5 * tf.reduce_sum(1 + self.z_log_sigma_sq - tf.square(self.z_mu) - tf.exp(self.z_log_sigma_sq),axis=1)
def build(self, input_dim): self.x = tf.placeholder(name='x', dtype=tf.float32, shape=[None, input_dim]) Xnoise = self.x + self.noise_factor * tf.random_normal(tf.shape( self.x)) Xnoise = tf.clip_by_value(Xnoise, 0., 1.) # Encode f1 = fc(Xnoise, self.hidden_layers[0], scope='dae_enc_fc1', activation_fn=tf.nn.relu) f2 = fc(f1, self.hidden_layers[1], scope='dae_enc_fc2', activation_fn=tf.nn.relu) self.z = fc(f2, self.hidden_layers[2], scope='dae_enc_fc3_mu', activation_fn=None) # Decode g1 = fc(self.z, self.hidden_layers[1], scope='dae_dec_fc2', activation_fn=tf.nn.relu) g2 = fc(g1, self.hidden_layers[0], scope='dae_dec_fc1', activation_fn=tf.nn.relu) self.x_hat = fc(g2, input_dim, scope='dae_dec_xhat', activation_fn=tf.nn.sigmoid) recon_loss = tf.reduce_mean(tf.square(self.x - self.x_hat), 1) self.recon_loss = tf.reduce_mean(recon_loss) self.train_op = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(self.recon_loss) return
def ae_network(self): """ The main structure of autoencoder network :return: """ input_dim = self.x.get_shape() self.x = tf.reshape(self.x,[input_dim[0],input_dim[1]]) input_dim = self.x.get_shape().as_list()[1] print("input_dim:",input_dim) # Encode # x -> z f1 = fc(self.x, 256, scope = 'enc_fc1', activation_fn = tf.nn.relu) f3 = fc(f1, 64, scope = 'enc_fc3', activation_fn = tf.nn.relu) z = fc(f3, self.nz, scope = 'enc_fc4', activation_fn = tf.nn.relu) # Decode # z -> x_hat g1 = fc(z, 64, scope = 'dec_fc1', activation_fn = tf.nn.relu) g3 = fc(g1, 256, scope = 'dec_fc3', activation_fn = tf.nn.relu) self.x_hat = fc(g3, input_dim, scope = 'dec_fc4', activation_fn = tf.sigmoid) print("build graph done!")
def build(self): self.x = tf.placeholder(name='x', dtype=tf.float32, shape=[None, input_dim]) # Encode # x -> z_mean, z_sigma -> z f0 = fc(self.x, 4096, scope='enc_fc0', activation_fn=tf.nn.elu, weights_initializer=tf.truncated_normal_initializer(stddev=0.01),weights_regularizer=slim.l2_regularizer(0.05)) f1 = fc(f0, 2048, scope='enc_fc1', activation_fn=tf.nn.elu, weights_initializer=tf.truncated_normal_initializer(stddev=0.01),weights_regularizer=slim.l2_regularizer(0.05)) f2 = fc(f1, 1024, scope='enc_fc2', activation_fn=tf.nn.elu, weights_initializer=tf.truncated_normal_initializer(stddev=0.01),weights_regularizer=slim.l2_regularizer(0.05)) f3 = fc(f2, 512, scope='enc_fc3', activation_fn=tf.nn.elu, weights_initializer=tf.truncated_normal_initializer(stddev=0.01),weights_regularizer=slim.l2_regularizer(0.05)) self.z_mu = fc(f3, self.n_z, scope='enc_fc4_mu', activation_fn=None) self.z_log_sigma_sq = fc(f3, self.n_z, scope='enc_fc4_sigma', activation_fn=None) eps = tf.random_normal(shape=tf.shape(self.z_log_sigma_sq), mean=0, stddev=1, dtype=tf.float32) self.z = self.z_mu + tf.sqrt(tf.exp(self.z_log_sigma_sq)) * eps # Decode # z -> x_hat g0 = fc(self.z, 512 , scope='dec_fc0', activation_fn=tf.nn.elu , weights_initializer=tf.truncated_normal_initializer(stddev=0.01),weights_regularizer=slim.l2_regularizer(0.05)) g1 = fc(g0, 1048, scope='dec_fc1', activation_fn=tf.nn.elu , weights_initializer=tf.truncated_normal_initializer(stddev=0.01),weights_regularizer=slim.l2_regularizer(0.05)) g2 = fc(g1, 2048, scope='dec_fc2', activation_fn=tf.nn.elu , weights_initializer=tf.truncated_normal_initializer(stddev=0.01),weights_regularizer=slim.l2_regularizer(0.05)) g3 = fc(g2, 4096, scope='dec_fc3', activation_fn=tf.nn.elu , weights_initializer=tf.truncated_normal_initializer(stddev=0.01),weights_regularizer=slim.l2_regularizer(0.05)) self.x_hat = fc(g3, input_dim, scope='dec_fc4', activation_fn=tf.sigmoid, weights_initializer=tf.truncated_normal_initializer(stddev=0.01),weights_regularizer=slim.l2_regularizer(0.05)) # Loss # Reconstruction loss # Minimize the cross-entropy loss # H(x, x_hat) = -\Sigma x*log(x_hat) + (1-x)*log(1-x_hat) epsilon = 1e-10 recon_loss = -tf.reduce_sum(self.x * tf.log(epsilon+self.x_hat) + (1-self.x) * tf.log(epsilon+1-self.x_hat), axis=1) self.recon_loss = tf.reduce_mean(recon_loss) # Latent loss # Kullback Leibler divergence: measure the difference between two distributions # Here we measure the divergence between the latent distribution and N(0, 1) latent_loss = -0.5 * tf.reduce_sum(1 + self.z_log_sigma_sq - tf.square(self.z_mu) - tf.exp(self.z_log_sigma_sq), axis=1) self.latent_loss = tf.reduce_mean(latent_loss) self.total_loss = tf.reduce_mean(recon_loss + latent_loss) self.train_op = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.total_loss) return
def build(self): self.x = tf.placeholder(name='x', dtype=tf.float32, shape=[None, self.vindim]) # Encode # x -> z_mean, z_sigma -> z f0 = fc(self.x, 30000, scope='enc_fc0', activation_fn=tf.nn.relu) f1 = fc(f0, 15000, scope='enc_fc1', activation_fn=tf.nn.relu) f2 = fc(f1, 10000, scope='enc_fc2', activation_fn=tf.nn.relu) f3 = fc(f2, 2000, scope='enc_fc3', activation_fn=tf.nn.relu) #f3 = fc(f3, 500, scope='enc_fc3', activation_fn=tf.nn.elu) self.z_mu = fc(f3, self.n_z, scope='enc_fc4_mu', activation_fn=None) self.z_log_sigma_sq = fc(f3, self.n_z, scope='enc_fc4_sigma', activation_fn=None) eps = tf.random_normal(shape=tf.shape(self.z_log_sigma_sq), mean=0, stddev=1, dtype=tf.float32) #zzz = self.z_mu + tf.sqrt(tf.exp(self.z_log_sigma_sq)) * eps #self.zzz = tf.Print(zzz,[zzz], message="my Z-values:") self.z = self.z_mu + tf.sqrt(tf.exp(self.z_log_sigma_sq)) * eps # Decode # z -> x_hat g1 = fc(self.z, 2000, scope='dec_fc1', activation_fn=tf.nn.relu) g2 = fc(g1, 10000, scope='dec_fc2', activation_fn=tf.nn.relu) g3 = fc(g2, 15000, scope='dec_fc3', activation_fn=tf.nn.relu) g4 = fc(g3, 30000, scope='dec_fc4', activation_fn=tf.nn.relu) self.x_hat = fc(g4, self.vindim, scope='dec_fc5', activation_fn=tf.sigmoid) # Loss # Reconstruction loss # Minimize the cross-entropy loss # H(x, x_hat) = -\Sigma x*log(x_hat) + (1-x)*log(1-x_hat) epsilon = 1e-9 recon_loss = -tf.reduce_sum(self.x * tf.log(epsilon + self.x_hat) + (1 - self.x) * tf.log(epsilon + (1 - self.x_hat)), axis=1) #recon_loss = tf.reduce_sum((self.x_hat-self.x)**2,axis=1) #recon_loss = tf.nn.l2_loss(self.x_hat-self.x) self.recon_loss = tf.reduce_mean(recon_loss) # Latent loss # Kullback Leibler divergence: measure the difference between two distributions # Here we measure the divergence between the latent distribution and N(0, 1) latent_loss = -0.5 * tf.reduce_sum(1 + self.z_log_sigma_sq - tf.square( self.z_mu) - tf.exp(self.z_log_sigma_sq), axis=1) self.latent_loss = tf.reduce_mean(latent_loss) self.total_loss = tf.reduce_mean(latent_loss + recon_loss) self.train_op = tf.train.RMSPropOptimizer( learning_rate=self.learning_rate).minimize(self.total_loss) #self.train_op = tf.train.AdamOptimizer( # learning_rate=self.learning_rate).minimize(self.total_loss) return
def build(self, input_tensor=None, loss_type='CE', use_conv=False, sphere_lat=True, crop=-1, alpha_r=3): self.input = tf.placeholder( dtype=tf.float32, shape=[None, self.h, self.w, 1], name="input_") self.label = tf.placeholder(dtype=tf.int32, shape=[None,], name="label_") print(self.input.name) if crop > 0: crops = [tf.random_crop(self.input, size=(self.batch_size, 16,72,1), seed=None, name="crop_{0:d}".format(ci)) for ci in range(crop)] self.input_ = tf.concat(crops, axis=0, name="input_crop") self.x = tf.layers.flatten(self.input_, name='x') self.input_dim = 16*72 else: self.x = tf.layers.flatten(self.input, name='x') # Encode # x -> z_mean, z_sigma -> z for var in tf.global_variables(): print(var.name, var.get_shape()) if use_conv: if crop > 0: f1 = conv2d(self.input_, num_outputs=32, kernel_size=[3,3]) else: f1 = conv2d(self.input, num_outputs=32, kernel_size=[3,3]) m1 = max_pool2d(f1, kernel_size=[1,2], stride=[1,2]) f2 = conv2d(m1, num_outputs=64, kernel_size=[3,3]) m2 = max_pool2d(f2, kernel_size=2, stride=2) f3 = conv2d(m2, num_outputs=128, kernel_size=[3,3]) m3 = max_pool2d(f3, kernel_size=[1,2], stride=[1,2]) f4 = conv2d(m3, num_outputs=256, kernel_size=[3,3]) m4 = max_pool2d(f4, kernel_size=2, stride=2) f5 = conv2d(m4, num_outputs=128, kernel_size=[3,3]) m5 = max_pool2d(f5, kernel_size=2, stride=2) f6 = conv2d(m5, num_outputs=256, kernel_size=[3,3]) #m6 = max_pool2d(f6, kernel_size=2, stride=2) #f7 = conv2d(m5, num_outputs=256, kernel_size=[3,3]) ff = tf.reduce_mean(f6, axis=(1,2)) else: f1 = fc(self.x, 512, scope='enc_fc1', activation_fn=tf.nn.relu) f2 = fc(f1, 256, scope='enc_fc2', activation_fn=tf.nn.relu) ff = fc(f2, 128, scope='enc_fc3', activation_fn=tf.nn.relu) z = fc(ff, self.n_z, scope='enc_fc4', activation_fn=None) if sphere_lat: norm = tf.sqrt(tf.reduce_sum(z*z,1, keepdims=True)) self.z = tf.div(z, norm, name='enc_norm') else: self.z = z print(self.z.name) if crop > 0: z1 = tf.reshape(self.z, (1, crop, self.batch_size, -1)) z2 = tf.reshape(self.z, (crop, 1, self.batch_size, -1)) zloss = tf.reduce_mean(1-tf.reduce_sum(z1*z2, axis=-1), name='zloss') * 0.1 else: zloss, pos_frac = batch_all_triplet_loss(self.label, self.z, margin=0.1, squared=False) #zloss = zloss * 0. #tf.constant(0, dtype=tf.float32) # Decode # z -> x_hat g1 = fc(self.z, 128, scope='dec_fc1', activation_fn=tf.nn.relu) g2 = fc(g1, 256, scope='dec_fc2', activation_fn=tf.nn.relu) g3 = fc(g2, 512, scope='dec_fc3', activation_fn=tf.nn.relu) self.x_hat = fc(g3, self.input_dim, scope='dec_fc4', activation_fn=tf.sigmoid) # Loss # Reconstruction loss # Minimize the cross-entropy loss # H(x, x_hat) = -\Sigma x*log(x_hat) + (1-x)*log(1-x_hat) epsilon = 1e-10 if loss_type == 'CE': self.recon_loss = -tf.reduce_sum( self.x * tf.log(epsilon+self.x_hat) + (1-self.x) * tf.log(epsilon+1-self.x_hat), axis=1 ) * alpha_r elif loss_type == 'l2': self.recon_loss = tf.sqrt(tf.reduce_mean( tf.square(self.x -self.x_hat), axis=1 )) * alpha_r elif loss_type == 'l1': self.recon_loss = tf.reduce_mean( tf.abs(self.x -self.x_hat), axis=1 ) * alpha_r # self.target_loss = -tf.reduce_sum( # self.x * tf.log(epsilon+self.x) + # (1-self.x) * tf.log(epsilon+1-self.x), # axis=1 # ) recon_loss = tf.reduce_mean(self.recon_loss) self.train_op = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(recon_loss+zloss) self.losses = { 'recon_loss': recon_loss, 'zloss': zloss } return