def model(): print("building model ...") with tf.variable_scope('train'): print("building model ...") X_pl = tf.placeholder(tf.float32, [None, num_features]) X_expand = tf.expand_dims(X_pl, axis=2) print("X_pl", X_pl.get_shape()) t_pl = tf.placeholder(tf.int32, [None,]) print("t_pl", t_pl.get_shape()) is_training_pl = tf.placeholder(tf.bool) cell_fw = tf.nn.rnn_cell.GRUCell(205) cell_bw = tf.nn.rnn_cell.GRUCell(205) seq_len = tf.reduce_sum(tf.ones(tf.shape(X_pl), dtype=tf.int32), axis=1) _, enc_states = tf.nn.bidirectional_dynamic_rnn(cell_fw=cell_fw, cell_bw=cell_bw, inputs=X_expand, sequence_length=seq_len, dtype=tf.float32) enc_states = tf.concat(1, enc_states) enc_states_drop = dropout(enc_states, is_training=is_training_pl) l1 = fully_connected(enc_states_drop, 200, activation_fn=None) l1 = batch_norm(l1, is_training=is_training_pl) l1_relu = relu(l1) l1_dropout = dropout(l1_relu, is_training=is_training_pl) l2 = fully_connected(l1_dropout, 200, activation_fn=None) l2 = batch_norm(l2, is_training=is_training_pl) l2_relu = relu(l2) l_out = fully_connected(l2_relu, num_outputs=num_classes, activation_fn=None) l_out_softmax = tf.nn.softmax(l_out) tf.contrib.layers.summarize_variables() with tf.variable_scope('metrics'): loss = sparse_softmax_cross_entropy_with_logits(l_out, t_pl) print("loss", loss.get_shape()) loss = tf.reduce_mean(loss) print("loss", loss.get_shape()) tf.summary.scalar('train/loss', loss) argmax = tf.to_int32(tf.argmax(l_out, 1)) print("argmax", argmax.get_shape()) correct = tf.to_float(tf.equal(argmax, t_pl)) print("correct,", correct.get_shape()) accuracy = tf.reduce_mean(correct) print("accuracy", accuracy.get_shape()) with tf.variable_scope('optimizer'): print("building optimizer ...") global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) grads_and_vars = optimizer.compute_gradients(loss) gradients, variables = zip(*grads_and_vars) clipped_gradients, global_norm = ( tf.clip_by_global_norm(gradients, clip_norm)) clipped_grads_and_vars = zip(clipped_gradients, variables) tf.summary.scalar('train/global_gradient_norm', global_norm) train_op = optimizer.apply_gradients(clipped_grads_and_vars, global_step=global_step) return X_pl, t_pl, is_training_pl, l_out, l_out_softmax, loss, accuracy, train_op, global_step
def build_model(self): sc = predictron_arg_scope() with tf.variable_scope('state'): with slim.arg_scope(sc): state = slim.conv2d(self.inputs, 32, [3, 3], scope='conv1') state = layers.batch_norm(state, activation_fn=tf.nn.relu, scope='conv1/preact') state = slim.conv2d(state, 32, [3, 3], scope='conv2') state = layers.batch_norm(state, activation_fn=tf.nn.relu, scope='conv2/preact') iter_template = tf.make_template('iter', self.iter_func, unique_name_='iter') rewards_arr = [] gammas_arr = [] lambdas_arr = [] values_arr = [] for k in range(self.max_depth): state, reward, gamma, lambda_, value = iter_template(state) rewards_arr.append(reward) gammas_arr.append(gamma) lambdas_arr.append(lambda_) values_arr.append(value) _, _, _, _, value = iter_template(state) # K + 1 elements values_arr.append(value) bs = tf.shape(self.inputs)[0] # [batch_size, K * maze_size] self.rewards = tf.pack(rewards_arr, axis=1) # [batch_size, K, maze_size] self.rewards = tf.reshape(self.rewards, [bs, self.max_depth, self.maze_size]) # [batch_size, K + 1, maze_size] self.rewards = tf.concat_v2(values=[tf.zeros(shape=[bs, 1, self.maze_size], dtype=tf.float32), self.rewards], axis=1, name='rewards') # [batch_size, K * maze_size] self.gammas = tf.pack(gammas_arr, axis=1) # [batch_size, K, maze_size] self.gammas = tf.reshape(self.gammas, [bs, self.max_depth, self.maze_size]) # [batch_size, K + 1, maze_size] self.gammas = tf.concat_v2(values=[tf.ones(shape=[bs, 1, self.maze_size], dtype=tf.float32), self.gammas], axis=1, name='gammas') # [batch_size, K * maze_size] self.lambdas = tf.pack(lambdas_arr, axis=1) # [batch_size, K, maze_size] self.lambdas = tf.reshape(self.lambdas, [-1, self.max_depth, self.maze_size]) # [batch_size, (K + 1) * maze_size] self.values = tf.pack(values_arr, axis=1) # [batch_size, K + 1, maze_size] self.values = tf.reshape(self.values, [-1, (self.max_depth + 1), self.maze_size]) self.build_preturns() self.build_lambda_preturns()
def mlp_w_bn_relu(x, weights, biases, dropout_rate, is_training): layer1 = tf.add(tf.matmul(x, weights['h1']), biases['b1']) layer1 = layers.batch_norm(layer1, center=True, scale=True, is_training=is_training_t) layer1 = tf.nn.relu(layer1) layer2 = tf.add(tf.matmul(layer1, weights['h2']), biases['b2']) layer2 = layers.batch_norm(layer2, center=True, scale=True, is_training=is_training_t) layer2 = tf.nn.relu(layer2) out_layer = tf.matmul(layer2, weights['out']) + biases['out'] # with linear activation return out_layer
def Batch_Normalization(x, training, scope): with arg_scope([batch_norm], scope=scope, updates_collections=None, decay=0.9, center=True, scale=True, zero_debias_moving_mean=True) : return tf.cond(training, lambda : batch_norm(inputs=x, is_training=training, reuse=None), lambda : batch_norm(inputs=x, is_training=training, reuse=True))
def my_bidirectional_dynamic_rnn(self,inp,mask): with tf.variable_scope("my_bi_rnn"): mask = tf.cast(mask, tf.float32) self.my_cell_fw=self.create_cell() self.my_cell_bw=self.create_cell() self.outputs,self.states= tf.nn.bidirectional_dynamic_rnn(cell_fw=self.my_cell_fw,cell_bw=self.my_cell_bw,inputs=inp, dtype=tf.float32) self.output_fw,self.output_bw=self.outputs self.state_fw,self.state_bw=self.states self.output_fw=self.output_fw*mask[:,:,None] self.output_bw=self.output_bw*mask[:,:,None] self.output_fw =batch_norm(self.output_fw,is_training=self.is_training, updates_collections=None) self.output_bw =batch_norm(self.output_bw,is_training=self.is_training, updates_collections=None) return self.output_fw,self.output_bw,self.state_fw,self.state_bw
def batch_normalize(tensor_in, epsilon=1e-5, convnet=False, # pylint: disable=unused-argument decay=0.9, scale_after_normalization=True): """Batch normalization. Instead, please use contrib.layers.batch_norm. You can get is_training via `tf.python.framework.ops.get_collection("IS_TRAINING")`. Args: tensor_in: input `Tensor`, 4D shape: [batch, in_height, in_width, in_depth]. epsilon : A float number to avoid being divided by 0. convnet: Whether this is for convolutional net use (ignored) decay: Decay rate for exponential moving average. scale_after_normalization: Whether to scale after normalization. Returns: A batch-normalized `Tensor`. """ is_training = ops.get_collection("IS_TRAINING") return batch_norm(tensor_in, is_training=is_training, epsilon=epsilon, decay=decay, scale=scale_after_normalization)
def my_dynamic_rnn(self,inp,mask): with tf.variable_scope("my_rnn"): self.my_cell= self.create_cell() self.output, self.state = tf.nn.dynamic_rnn(cell=self.my_cell, inputs=inp,dtype=tf.float32) #(b,s,h) self.output=self.output*mask[:,:,None] self.output =batch_norm(self.output,is_training=is_training, updates_collections=None) return self.output,self.state
def batch_normalize(tensor_in, epsilon=1e-5, convnet=False, decay=0.9, scale_after_normalization=True): """Batch normalization. Note this is deprecated. Instead, please use contrib.layers.batch_norm. You can get is_training via `tf.python.framework.ops.get_collection("IS_TRAINING")`. Args: tensor_in: input `Tensor`, 4D shape: [batch, in_height, in_width, in_depth]. epsilon : A float number to avoid being divided by 0. convnet: Whether this is for convolutional net use. If `True`, moments will sum across axis `[0, 1, 2]`. Otherwise, only `[0]`. decay: Decay rate for exponential moving average. scale_after_normalization: Whether to scale after normalization. Returns: A batch-normalized `Tensor`. """ logging.warning("learn.ops.batch_normalize is deprecated, \ please use contrib.layers.batch_norm.") is_training = ops.get_collection("IS_TRAINING") return batch_norm(tensor_in, is_training=is_training, epsilon=epsilon, decay=decay, scale=scale_after_normalization)
def conv_relu(x_in, kernel_shape, train_phase): weights = tf.get_variable("weights", kernel_shape, initializer=tf.contrib.layers.xavier_initializer()) biases = tf.get_variable("biases", kernel_shape[-1], initializer=tf.constant_initializer(0.)) conv = tf.nn.conv2d(x_in, weights, strides=[1, 1, 1, 1], padding='SAME') conv = batch_norm(conv, train_phase) return tf.nn.relu(conv + biases)
def iter_func(self, state): sc = predictron_arg_scope() with tf.variable_scope('value'): value_net = slim.fully_connected(slim.flatten(state), 32, scope='fc0') value_net = layers.batch_norm(value_net, activation_fn=tf.nn.relu, scope='fc0/preact') value_net = slim.fully_connected(value_net, self.maze_size, activation_fn=None, scope='fc1') with slim.arg_scope(sc): net = slim.conv2d(state, 32, [3, 3], scope='conv1') net = layers.batch_norm(net, activation_fn=tf.nn.relu, scope='conv1/preact') net_flatten = slim.flatten(net, scope='conv1/flatten') with tf.variable_scope('reward'): reward_net = slim.fully_connected(net_flatten, 32, scope='fc0') reward_net = layers.batch_norm(reward_net, activation_fn=tf.nn.relu, scope='fc0/preact') reward_net = slim.fully_connected(reward_net, self.maze_size, activation_fn=None, scope='fc1') with tf.variable_scope('gamma'): gamma_net = slim.fully_connected(net_flatten, 32, scope='fc0') gamma_net = layers.batch_norm(gamma_net, activation_fn=tf.nn.relu, scope='fc0/preact') gamma_net = slim.fully_connected(gamma_net, self.maze_size, activation_fn=tf.nn.sigmoid, scope='fc1') with tf.variable_scope('lambda'): lambda_net = slim.fully_connected(net_flatten, 32, scope='fc0') lambda_net = layers.batch_norm(lambda_net, activation_fn=tf.nn.relu, scope='fc0/preact') lambda_net = slim.fully_connected(lambda_net, self.maze_size, activation_fn=tf.nn.sigmoid, scope='fc1') net = slim.conv2d(net, 32, [3, 3], scope='conv2') net = layers.batch_norm(net, activation_fn=tf.nn.relu, scope='conv2/preact') net = slim.conv2d(net, 32, [3, 3], scope='conv3') net = layers.batch_norm(net, activation_fn=tf.nn.relu, scope='conv3/preact') return net, reward_net, gamma_net, lambda_net, value_net
def conv_residual(x_in,n_filt, train_phase): in_dim = x_in.get_shape().as_list()[3] kernel_shape = [3, 3, in_dim, n_filt] weights = tf.get_variable("weights", kernel_shape, initializer=tf.contrib.layers.xavier_initializer()) biases = tf.get_variable("biases", kernel_shape[-1], initializer=tf.constant_initializer(0.)) conv = tf.nn.conv2d(x_in, weights, strides=[1, 1, 1, 1], padding='SAME') conv = batch_norm(conv, decay=0.99, is_training=train_phase) return conv
def get_batch_norm(inputs,is_training,scope=None): """Do batch normalization""" bn_out = layers.batch_norm( inputs=inputs, center=True, scale=True, is_training=is_training, scope=scope ) return bn_out
def BN_ReLU(self, net): # Batch Normalization and ReLU # 'gamma' is not used as the next layer is ReLU net = batch_norm(net, center=True, scale=False, activation_fn=tf.nn.relu, ) # net = tf.nn.relu(net) # activation summary ?? self._activation_summary(net) return net
def embedding_project_fn(embeddings): if FLAGS.embedding_dim != FLAGS.model_dim: # Need to project embeddings to model dimension. embeddings = util.Linear(embeddings, FLAGS.model_dim, bias=False) if FLAGS.embedding_batch_norm: embeddings = layers.batch_norm(embeddings, center=True, scale=True, is_training=True) if FLAGS.embedding_keep_rate < 1.0: embeddings = tf.cond(is_training, lambda: tf.nn.dropout(embeddings, FLAGS.embedding_keep_rate), lambda: embeddings / FLAGS.embedding_keep_rate) return embeddings
def build_rnn_model(num_timesteps, vocab_size, classifier_fn, is_training, num_classes, train_embeddings=True, initial_embeddings=None): with tf.variable_scope("rnn"): ys = tf.placeholder(tf.int32, (FLAGS.batch_size,), "ys") assert FLAGS.model_dim % 2 == 0, "model_dim must be even; we're using LSTM memory cels which are divided in half" s1_inputs = [tf.placeholder(tf.int32, (FLAGS.batch_size,), "s1_input_%i" % t) for t in range(num_timesteps)] s1_lengths = tf.placeholder(tf.int32, (FLAGS.batch_size,), "s1_lengths") with tf.device("/cpu:0"): embeddings = tf.get_variable("embeddings", (vocab_size, FLAGS.embedding_dim)) s1_embedded = [tf.nn.embedding_lookup(embeddings, s1_input_t) for s1_input_t in s1_inputs] cell = tf.nn.rnn_cell.BasicLSTMCell(FLAGS.model_dim / 2) with tf.variable_scope("s1"): _, s1_state = tf.nn.rnn(cell, s1_embedded, dtype=tf.float32, sequence_length=s1_lengths) mlp_input = s1_state if FLAGS.sentence_repr_batch_norm: mlp_input = layers.batch_norm(mlp_input, center=True, scale=True, is_training=True, scope="sentence_repr_bn") if FLAGS.sentence_repr_keep_rate < 1.0: mlp_input = tf.cond(is_training, lambda: tf.nn.dropout(mlp_input, FLAGS.sentence_repr_keep_rate, name="sentence_repr_dropout"), lambda: mlp_input / FLAGS.sentence_repr_keep_rate) logits = classifier_fn(mlp_input) assert logits.get_shape()[1] == num_classes xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, ys) xent_loss = tf.reduce_mean(xent_loss) tf.scalar_summary("xent_loss", xent_loss) rewards = build_rewards(logits, ys) tf.scalar_summary("avg_reward", tf.reduce_mean(rewards)) params = tf.trainable_variables() if not train_embeddings: params.remove(embeddings) l2_loss = tf.add_n([tf.reduce_sum(tf.square(param)) for param in params]) tf.scalar_summary("l2_loss", l2_loss) total_loss = xent_loss + FLAGS.l2_lambda * l2_loss gradients = zip(tf.gradients(total_loss, params), params) return ((s1_inputs, s1_lengths)), logits, ys, gradients
def encoder(inputs): # encoder # 32 x 32 x 32 x 1 -> 16 x 16 x 16 x 32 # 16 x 16 x 16 x 32 -> 8 x 8 x 8 x 16 # 8 x 8 x 8 x 16 -> 2 x 2 x 2 x 8 print("input type and shape", type(inputs), inputs.shape) net = lays.batch_norm(lays.conv3d(inputs, 32, [5, 5, 5], stride=2, padding='SAME', trainable=True), decay=0.9) net = lays.batch_norm(lays.conv3d(net, 16, [5, 5, 5], stride=2, padding='SAME', trainable=True), decay=0.9) net = lays.batch_norm(lays.conv3d(net, 8, [5, 5, 5], stride=4, padding='SAME', trainable=True), decay=0.9) net = lays.batch_norm(lays.flatten(net), decay=0.9) z_mean = lays.fully_connected(net, z_dims) z_stdev = 0.5 * tf.nn.softplus(lays.fully_connected(net, z_dims)) # Reparameterization trick for Variational Autoencoder samples = tf.random_normal([tf.shape(z_mean)[0], z_dims], mean=0, stddev=1, dtype=tf.float32) print("rank and shape of samples", tf.rank(samples)) guessed_z = z_mean + tf.multiply(samples, z_stdev) print("rank and shape of guessed z", tf.rank(guessed_z)) l_space = guessed_z return z_mean, z_stdev, l_space
def build_model(self): ## placeholder for hyper-parameters self.bn_train = tf.placeholder('bool') self.learning_rate = tf.placeholder(tf.float32, shape=[]) ## placeholder for data self.x_i = tf.placeholder(tf.float32, shape=[None, self.n_samples, self.x_dim, self.x_dim, 1]) self.y_i_ind = tf.placeholder(tf.int32, shape=[None, self.n_samples]) self.y_i = tf.one_hot(self.y_i_ind, self.y_dim) self.x_hat = tf.placeholder(tf.float32, shape=[None, self.x_dim, self.x_dim, 1]) self.y_hat_ind = tf.placeholder(tf.int32, shape=[None]) self.y_hat = tf.one_hot(self.y_hat_ind, self.y_dim) ## batch normalization self.bn0 = batch_norm(epsilon=self.epsilon, momentum=self.bnDecay, name='bn0') self.bn1 = batch_norm(epsilon=self.epsilon, momentum=self.bnDecay, name='bn1') self.bn2 = batch_norm(epsilon=self.epsilon, momentum=self.bnDecay, name='bn2') self.bn3 = batch_norm(epsilon=self.epsilon, momentum=self.bnDecay, name='bn3') cos_sim_list = [] varscope = 'encode_x' self.x_hat_encode = self.conv_net(self.x_hat, scope=varscope, bn_train=self.bn_train) # [-1, 64] if not self.tie: varscope = 'encode_x_i' for i in range(self.n_samples): x_i_encode = self.conv_net(self.x_i[:,i,:,:,:], scope=varscope, bn_train=self.bn_train, reuse=(self.tie or i > 0)) # [-1, 64] dotted = tf.expand_dims(tf.reduce_sum(tf.multiply(self.x_hat_encode, x_i_encode), axis=1), axis=1) # [-1, 1] x_i_inv_mag = tf.rsqrt(tf.clip_by_value(tf.reduce_sum(tf.square(x_i_encode), 1, keep_dims=True), self.epsilon, float("inf"))) cos_sim_list.append(dotted * x_i_inv_mag) cos_sim = tf.concat(cos_sim_list, axis=1) # [-1, self.n_samples] weighting = tf.nn.softmax(cos_sim) label_prob = tf.squeeze(tf.matmul(tf.expand_dims(weighting, 1), self.y_i)) top_k = tf.nn.in_top_k(label_prob, self.y_hat_ind, 1) self.acc = tf.reduce_mean(tf.to_float(top_k)) correct_prob = tf.reduce_sum(tf.log(tf.clip_by_value(label_prob, self.epsilon, 1.0)) * self.y_hat, 1) self.loss = tf.reduce_mean(-correct_prob, 0) self.train_op = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=0.5).minimize(self.loss) ## Create model saver (max_to_keep = 5 by default) self.saver = tf.train.Saver()
def encoder1(tensor): # noise = tf.random_normal([mb_size, 64, 64, 1]) noise = tf.random_uniform([mb_size, 64, 64, 1], -1, 1) tensor = tf.concat([tensor, noise], axis=3) conv1 = layers.conv2d(tensor, 32, 5, stride=2, activation_fn=None, weights_initializer=initializer) conv1 = layers.batch_norm(conv1, activation_fn=tf.nn.relu) conv2 = layers.conv2d(conv1, 64, 5, stride=2, activation_fn=None, normalizer_fn=layers.batch_norm, weights_initializer=initializer) conv2 = layers.batch_norm(conv2, activation_fn=tf.nn.relu) conv3 = layers.conv2d(conv2, 128, 5, stride=2, activation_fn=None, normalizer_fn=layers.batch_norm, weights_initializer=initializer) conv3 = layers.batch_norm(conv3, activation_fn=tf.nn.relu) conv4 = layers.conv2d(conv3, 256, 5, stride=2, activation_fn=None, normalizer_fn=layers.batch_norm, weights_initializer=initializer) conv4 = layers.batch_norm(conv4, activation_fn=tf.nn.relu) conv5 = layers.conv2d(conv4, 512, 5, stride=2, activation_fn=None, normalizer_fn=layers.batch_norm, weights_initializer=initializer) conv5 = layers.batch_norm(conv5, activation_fn=tf.nn.relu) # fc1 = tf.reshape(conv4, shape=[-1, 2 * 2 * 512]) fc1 = layers.flatten(conv5) fc1 = layers.fully_connected( inputs=fc1, num_outputs=512, activation_fn=None, weights_initializer=initializer) fc1 = layers.batch_norm(fc1, activation_fn=lrelu) fc2 = layers.fully_connected(inputs=fc1, num_outputs=Z_dim, activation_fn=tf.nn.tanh, weights_initializer=initializer) return fc2
def conv_t_block(x, filter_size, stride_length, n_maps, output_shape, name): """ CNN block with cnn_trans, batch norm and lrelu repeated twice. The input Tensor's H and W is up-sampled only once. :param x: Tensor, input tensor :param filter_size: int or float, size of the square kernel to use :param stride_length: int, stride length to be applied :param n_maps: int, number of output maps :param output_shape: String, Name of the block on Tensorboard also prefixes this value for the variable name :param name: :return: Tensor, output tensor after passing it through the convolutional transpose block """ with tf.variable_scope(name): conv_t_1 = lrelu( batch_norm(cnn_2d_trans( x, weight_shape=[ filter_size, filter_size, n_maps, x.get_shape()[-1] ], strides=[1, stride_length, stride_length, 1], output_shape=output_shape, name='conv_t_1'), center=True, scale=True, is_training=True, scope='Batch_Norm_1')) conv_t_2 = lrelu( batch_norm(cnn_2d_trans(conv_t_1, weight_shape=[ filter_size, filter_size, n_maps, conv_t_1.get_shape()[-1] ], strides=[1, 1, 1, 1], output_shape=output_shape, name='conv_t_2'), center=True, scale=True, is_training=True, scope='Batch_Norm_2')) return conv_t_2
def conv_shortcut(x_in, n_filt, train_phase, keep_prob=None,use_leaky=False): # shortcut connections for residual in_dim = x_in.get_shape().as_list()[3] kernel_shape = [1, 1, in_dim, n_filt] with tf.variable_scope('shortcut'): weights = tf.get_variable("weights", kernel_shape, initializer=tf.contrib.layers.xavier_initializer()) biases = tf.get_variable("biases", kernel_shape[-1], initializer=tf.constant_initializer(0.)) conv = tf.nn.conv2d(x_in, weights, strides=[1, 1, 1, 1], padding='SAME') conv = batch_norm(conv, decay=0.99, is_training=train_phase, renorm=renorm) return conv
def _conv_bn_2d(self, inputs, out_channel, kernel_size, fn=None, scope=''): conv = layers.conv2d(inputs, out_channel, kernel_size, activation_fn=None, scope='_'.join((scope, 'conv')), **self._reg) bn = layers.batch_norm(conv, scale=True, activation_fn=fn, is_training=self._is_training, scope='_'.join((scope, 'bn'))) return bn
def encoder1(tensor): conv1 = layers.conv2d(tensor, 32, 4, stride=2, activation_fn=None, weights_initializer=initializer) conv1 = layers.batch_norm(conv1, activation_fn=lrelu) conv2 = layers.conv2d(conv1, 64, 4, stride=2, activation_fn=None, normalizer_fn=layers.batch_norm, weights_initializer=initializer) conv2 = layers.batch_norm(conv2, activation_fn=lrelu) conv3 = layers.conv2d(conv2, 128, 4, stride=2, activation_fn=None, normalizer_fn=layers.batch_norm, weights_initializer=initializer) # 8 x 8 x 128 conv3 = layers.batch_norm(conv3, activation_fn=lrelu) conv4 = layers.conv2d(conv3, 256, 4, stride=2, activation_fn=None, normalizer_fn=layers.batch_norm, weights_initializer=initializer) # 4 x 4 x 256 conv4 = layers.batch_norm(conv4, activation_fn=lrelu) conv5 = layers.conv2d(conv4, 512, 4, stride=2, activation_fn=None, normalizer_fn=layers.batch_norm, weights_initializer=initializer) # 2 x 2 x 512 conv5 = layers.batch_norm(conv5, activation_fn=lrelu) fc1 = tf.reshape(conv5, shape=[-1, 2 * 2 * 512]) fc1 = layers.fully_connected(inputs=fc1, num_outputs=512, activation_fn=None, weights_initializer=initializer) fc1 = layers.batch_norm(fc1, activation_fn=lrelu) fc2 = layers.fully_connected(inputs=fc1, num_outputs=40, activation_fn=tf.nn.sigmoid, weights_initializer=initializer) return fc2
def buildGeneratorTest(self, x_input, x_cond, bn_train): tempVec = tf.concat([x_input, x_cond], 1) tempDim = self.randomDim + self.condDim with tf.variable_scope('generator'): for i, genDim in enumerate(self.generatorDims[:-1]): W = tf.get_variable('W_' + str(i), shape=[tempDim, genDim], regularizer=l2_regularizer(self.l2scale)) h = tf.matmul(tempVec, W) h2 = batch_norm(h, decay=self.bnDecay, scale=True, is_training=bn_train, updates_collections=None, trainable=False) h3 = self.generatorActivation(h2) tempVecUse, rest = tf.split(tempVec, [genDim, tempDim - genDim], axis=1) tempVec = h3 + tempVecUse tempDim = genDim W = tf.get_variable('W' + str(i), shape=[tempDim, self.generatorDims[-1]], regularizer=l2_regularizer(self.l2scale)) h = tf.matmul(tempVec, W) h2 = batch_norm(h, decay=self.bnDecay, scale=True, is_training=bn_train, updates_collections=None, trainable=False) if self.dataType == 'binary': h3 = tf.nn.tanh(h2) else: h3 = tf.nn.relu(h2) output = h3 + tempVec return output
def buildGenerator(self, x_input, bn_train): tempVec = x_input tempDim = self.randomDim with tf.variable_scope('generator', regularizer=l2_regularizer(self.l2scale)): for i, genDim in enumerate(self.generatorDims[:-1]): W = tf.get_variable('W_'+str(i), shape=[tempDim, genDim]) h = tf.matmul(tempVec,W) h2 = batch_norm(h, decay=self.bnDecay, scale=True, is_training=bn_train, updates_collections=None) h3 = self.generatorActivation(h2) tempVec = h3 + tempVec tempDim = genDim W = tf.get_variable('W'+str(i), shape=[tempDim, self.generatorDims[-1]]) h = tf.matmul(tempVec,W) h2 = batch_norm(h, decay=self.bnDecay, scale=True, is_training=bn_train, updates_collections=None) if self.dataType == 'binary': h3 = tf.nn.tanh(h2) else: h3 = tf.nn.relu(h2) output = h3 + tempVec return output
def general_conv2d(inputconv, o_d=64, f_h=7, f_w=7, s_h=1, s_w=1, stddev=0.02, padding="VALID", name="conv2d", do_norm=True, do_relu=True, relufactor=0): with tf.variable_scope(name): conv = layers.conv2d(inputconv, o_d, f_w, s_w, padding, activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=stddev), biases_initializer=tf.constant_initializer(0.0)) if do_norm: # conv = instance_norm(conv) conv = layers.batch_norm(conv, decay=0.9, scope="batch_norm") if do_relu: if(relufactor == 0): conv = tf.nn.relu(conv, "relu") else: conv = lrelu(conv, relufactor, "lrelu") return conv
def gconvbn(*args, **kwargs): scope = kwargs.pop('scope', None) with tf.variable_scope(scope): x = separable_conv2d(*args, **kwargs) c = args[-1] f = x.shape[-1].value // c g = f // c x = tf.reshape(x, tf.concat([tf.shape(x)[:-1], tf.constant([g, c, c])], axis=0)) x = tf.reduce_sum(x, axis=-2) x = reshape(x, tf.concat([tf.shape(x)[:-2], tf.constant([f])], axis=0), name='gconv') return batch_norm(x)
def discriminator(x, y): yb = tf.reshape(y, [-1, 1, 1, 40]) h = tf.reshape(x, [-1, 64, 64, 3]) h = conv_cond_concat(h, yb) h = layers.conv2d(h, 64, 5, stride=2, padding='SAME', activation_fn=None, weights_initializer=initializer) h = layers.batch_norm(h, activation_fn=tf.nn.relu) # h = conv_cond_concat(h, yb) h = layers.conv2d(h, 64*2, 5, stride=2, padding='SAME', activation_fn=None, weights_initializer=initializer) h = layers.batch_norm(h, activation_fn=tf.nn.relu) # h = conv_cond_concat(h, yb) h = layers.conv2d(h, 64*4, 5, stride=2, padding='SAME', activation_fn=None, weights_initializer=initializer) h = layers.batch_norm(h, activation_fn=tf.nn.relu) # h = conv_cond_concat(h, yb) h = layers.conv2d(h, 64*8, 5, stride=2, padding='SAME', activation_fn=None, weights_initializer=initializer) h = layers.batch_norm(h, activation_fn=tf.nn.relu) h = layers.flatten(h) return layers.fully_connected(h, 1, activation_fn=tf.sigmoid)
def f2(): """ :return: """ # print('batch_normalization: test phase') return tf_layer.batch_norm(inputdata, is_training=False, center=True, scale=True, updates_collections=None, scope=name, reuse=True)
def discriminator(self, x, y, reuse=False): with tf.variable_scope("discriminator", reuse=reuse): # mnist data's shape is (28 , 28 , 1) yb = tf.reshape(y, shape=[self.batch_size, 1, 1, self.y_dim]) # concat xy = conv_cond_concat(x, yb) conv1 = tf.layers.conv2d(xy, 10, 5, 2, padding="same", activation=lrelu) conv1 = conv_cond_concat(conv1, yb) conv2 = contrib_layers.batch_norm(tf.layers.conv2d(conv1, 64, 5, 2, padding="same"), activation_fn=lrelu) conv2 = tf.reshape(conv2, [self.batch_size, -1]) conv2 = tf.concat([conv2, y], 1) f1 = contrib_layers.batch_norm(tf.layers.dense(conv2, 1024), activation_fn=lrelu) f1 = tf.concat([f1, y], 1) out = tf.layers.dense(f1, 1, activation=None) return out
def conv_bn_layer(input_tensor, kernel_size, output_channels, initializer, stride=1, bn=False, is_training=True, relu=True): # with tf.variable_scope(name) as scope: conv_layer = layers.conv2d(inputs=input_tensor, num_outputs=output_channels, kernel_size=kernel_size, stride=stride, activation_fn=tf.identity, padding='SAME', weights_initializer=initializer) if bn and relu: #How to use Batch Norm: https://github.com/martin-gorner/tensorflow-mnist-tutorial/blob/master/README_BATCHNORM.md #Why scale is false when using ReLU as the next activation #https://datascience.stackexchange.com/questions/22073/why-is-scale-parameter-on-batch-normalization-not-needed-on-relu/22127 #Using fuse operation: https://www.tensorflow.org/performance/performance_guide#common_fused_ops conv_layer = layers.batch_norm(inputs=conv_layer, center=True, scale=False, is_training=is_training, fused=True) conv_layer = tf.nn.relu(conv_layer) if bn and not relu: conv_layer = layers.batch_norm(inputs=conv_layer, center=True, scale=True, is_training=is_training) # print('Conv layer {0} -> {1}'.format(input_tensor.get_shape().as_list(),conv_layer.get_shape().as_list())) return conv_layer
def SDAE_calculate(model_name, X_c, layer_structure, W, b, batch_normalization, f_act, g_act, model_keep_prob, V_u=None): hidden_value = X_c for itr1 in range(len(layer_structure) - 1): ''' Encoder ''' if itr1 <= int(len(layer_structure) / 2) - 1: if (itr1 == 0) and (model_name == "CDAE"): ''' V_u ''' before_activation = tf.add( tf.add(tf.matmul(hidden_value, W[itr1]), V_u), b[itr1]) else: before_activation = tf.add(tf.matmul(hidden_value, W[itr1]), b[itr1]) if batch_normalization == "True": before_activation = batch_norm(before_activation) hidden_value = f_act(before_activation) ''' Decoder ''' elif itr1 > int(len(layer_structure) / 2) - 1: before_activation = tf.add(tf.matmul(hidden_value, W[itr1]), b[itr1]) if batch_normalization == "True": before_activation = batch_norm(before_activation) hidden_value = g_act(before_activation) if itr1 < len(layer_structure) - 2: # add dropout except final layer hidden_value = tf.nn.dropout(hidden_value, model_keep_prob) if itr1 == int(len(layer_structure) / 2) - 1: Encoded_X = hidden_value sdae_output = hidden_value return Encoded_X, sdae_output
def conv_relu3_noscaling(x_in, n_filt, train_phase): in_dim = x_in.get_shape().as_list()[3] kernel_shape = [3, 3, in_dim, n_filt] weights = tf.get_variable( "weights", kernel_shape, initializer=tf.contrib.layers.variance_scaling_initializer()) biases = tf.get_variable("biases", kernel_shape[-1], initializer=tf.constant_initializer(0.)) conv = tf.nn.conv2d(x_in, weights, strides=[1, 1, 1, 1], padding='SAME') conv = batch_norm(conv, decay=0.99, is_training=train_phase) return tf.nn.relu(conv + biases)
def conv_layer_with_bias_one(self, input, shape, name): with tf.variable_scope(name): input = tf.nn.conv2d(input, self.get_weight_variable(shape, name=name + "_filter"), [1, 1, 1, 1], padding='SAME') input = tf.nn.bias_add( input, self.get_bias_variable([shape[3]], name=name + "_bias")) return tf.nn.relu( tcl.batch_norm(input, center=False, scope=name + "_bn")) pass
def generator(z, z_dim): """ Used to generate fake images to fool the discriminator. :param z: The input random noise. :param z_dim: The dimension of the input noise. :return: Fake images -> [BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, 3] """ gf_dim = 64 z2 = dense(z, z_dim, gf_dim * 8 * 4 * 4, scope='g_h0_lin') h0 = tf.nn.relu( batch_norm(tf.reshape(z2, [-1, 4, 4, gf_dim * 8]), center=True, scale=True, is_training=True, scope='g_bn1')) h1 = tf.nn.relu( batch_norm(conv_transpose(h0, [mc.BATCH_SIZE, 8, 8, gf_dim * 4], "g_h1"), center=True, scale=True, is_training=True, scope='g_bn2')) h2 = tf.nn.relu( batch_norm(conv_transpose(h1, [mc.BATCH_SIZE, 16, 16, gf_dim * 2], "g_h2"), center=True, scale=True, is_training=True, scope='g_bn3')) h3 = tf.nn.relu( batch_norm(conv_transpose(h2, [mc.BATCH_SIZE, 32, 32, gf_dim * 1], "g_h3"), center=True, scale=True, is_training=True, scope='g_bn4')) h4 = conv_transpose(h3, [mc.BATCH_SIZE, 64, 64, 3], "g_h4") return tf.nn.tanh(h4)
def create_deep_with_dropout_inference_op(self, images): """ Performs a forward pass estimating label maps from RGB images using a deep convolution net. :param images: The RGB images tensor. :type images: tf.Tensor :return: The label maps tensor. :rtype: tf.Tensor """ h_conv = conv_layer('conv1', images, 3, 16, strides=(1, 2, 2, 1), histogram_summary=True) h_conv = batch_norm(h_conv) h_conv = conv_layer('conv2', h_conv, 16, 32, strides=(1, 2, 2, 1), histogram_summary=True) h_conv = batch_norm(h_conv) h_conv = conv_layer('conv3', h_conv, 32, 64, strides=(1, 2, 2, 1), histogram_summary=True) h_conv = tf.nn.dropout(h_conv, self.dropout_keep_probability_tensor) h_conv = batch_norm(h_conv) h_conv = conv_layer('conv4', h_conv, 64, 128, strides=(1, 2, 2, 1), histogram_summary=True) h_conv = tf.nn.dropout(h_conv, self.dropout_keep_probability_tensor) h_conv = batch_norm(h_conv) h_conv = conv_layer('conv5', h_conv, 128, 256, strides=(1, 2, 2, 1), histogram_summary=True) h_conv = tf.nn.dropout(h_conv, self.dropout_keep_probability_tensor) h_conv = batch_norm(h_conv) h_conv = conv_layer('conv6', h_conv, 256, 256, conv_height=10, conv_width=10, strides=(1, 2, 2, 1), histogram_summary=True) h_conv_drop = tf.nn.dropout(h_conv, self.dropout_keep_probability_tensor) with tf.name_scope('fc1'): fc0_size = size_from_stride_two(self.settings.image_height, iterations=6) * size_from_stride_two( self.settings.image_width, iterations=6) * 256 fc1_size = 2 h_fc = tf.reshape(h_conv_drop, [-1, fc0_size]) w_fc = weight_variable([fc0_size, fc1_size]) b_fc = bias_variable([fc1_size]) predicted_labels = leaky_relu(tf.matmul(h_fc, w_fc) + b_fc) return predicted_labels
def conv1d(input_, input_size, output_dim, is_training=True, reuse=False, name='conv1d_layer'): with tf.variable_scope(name): with tf.variable_scope('first_layer'): net = tf.layers.conv1d(inputs=input_, filters=256, kernel_size=input_size, activation=None) net = layers.batch_norm(net, decay=0.9, updates_collections=None, activation_fn=None, center=True, scale=True, zero_debias_moving_mean=True, is_training=is_training) net = tf.nn.relu(net) with tf.variable_scope('second_layer'): net = tf.layers.conv1d(inputs=net, filters=output_dim * 10, kernel_size=1, activation=None) net = layers.batch_norm(net, decay=0.9, updates_collections=None, activation_fn=None, center=True, scale=True, zero_debias_moving_mean=True, is_training=is_training) net = tf.nn.relu(net) print(net) return net
def cust_conv2d(input_layer, out_dim, h_f=3, w_f=3, h_s=2, w_s=2, padding="SAME", scope_name=None, batch_norm=True, activation_fn=tf_utils.leaky_rectify, is_training=True): with tf.variable_scope(scope_name) as _: out = ly.conv2d(input_layer, out_dim, [w_f, h_f], [h_s, w_s], padding, activation_fn=None) if batch_norm: out = ly.batch_norm(out, is_training=is_training, updates_collections=None) if activation_fn: out = activation_fn(out) return out
def conv_layer(net, filters=32, hyperparameter=False, activation=tf.nn.relu, stride=1, max_pool=True, var_coll=far.HYPERPARAMETERS_COLLECTIONS, conv_initialization=tf.contrib.layers.xavier_initializer_conv2d(tf.float32)): max_pool_stride = [1, 2, 2, 1] bn = lambda _inp: tcl.batch_norm(_inp, variables_collections=var_coll) net + tcl.conv2d(net.out, num_outputs=filters, stride=stride, kernel_size=3, normalizer_fn=bn, activation_fn=None, trainable=not hyperparameter, variables_collections=var_coll, weights_initializer=conv_initialization) net + activation(net.out) if max_pool: net + tf.nn.max_pool(net.out, max_pool_stride, max_pool_stride, 'VALID')
def fc_bn(x_, nh, scope, is_training, nonlinearity=None, reg_coef=5e-5): with tf.variable_scope(scope): h1 = layers.fully_connected(x_, nh, activation_fn=None, biases_initializer=None, reuse=tf.AUTO_REUSE, scope='without_bn') h2 = layers.batch_norm(h1, decay=1-reg_coef, scale=True, epsilon = 1e-4, activation_fn=nonlinearity, is_training=is_training, reuse=tf.AUTO_REUSE, scope='bn') return h2
def generator(self, inputs, reuse=False, is_training=False, name='Generator'): with tf.variable_scope(name) as scope: if reuse: scope.reuse_variables() #Project Latent_size vector into 7*7*64 tensor with tf.name_scope('G_layer0'): g_0 = tf.cast(layer.fully_connected(inputs=inputs, num_outputs=7 * 7 * 64, activation_fn=None, scope='Linear'), dtype=tf.float32) g_0 = layer.batch_norm(g_0, is_training=is_training, scope='Batch_n') g_0 = tf.nn.relu(g_0, name='Relu') g_0 = tf.reshape(g_0, shape=[-1, 7, 7, 64]) #Projector1 g_1 = self.conv2d_transpose_bn(inputs=g_0, output_num=32, filter_size=5, stride=2, is_training=is_training, name='G_layer1_conv') #Projector2 g_2 = self.conv2d_transpose_bn(inputs=g_1, output_num=16, filter_size=5, stride=1, is_training=is_training, name='G_layer2_conv') #Projector3 g_3 = self.conv2d_transpose_bn(inputs=g_2, output_num=8, filter_size=5, stride=2, is_training=is_training, name='G_layer3_conv') #Projector4 g_4 = layer.conv2d_transpose(inputs=g_3, num_outputs=1, kernel_size=5, stride=1, padding='SAME', activation_fn=tf.nn.tanh, scope='G_layer4_conv') return g_4
def Conv(data, num_filter=1, kernel=(1, 1), stride=(1, 1), pad=(0, 0), num_group=1, name=None): assert stride[0] == stride[1] stride = stride[0] assert (kernel == (3, 3) and pad == (1, 1)) or (kernel == (1, 1) and pad == (0, 0)) pad = 'SAME' assert num_group == 1 or num_group == data.get_shape().as_list()[-1] with tf.variable_scope(name): if num_group == 1: y = layers.conv2d(data, num_filter, kernel, stride, 'SAME') else: y = layers.separable_conv2d(data, None, kernel, 1, stride, 'SAME') y = layers.batch_norm(y) y = prelu(y) return y
def residual_conv_block(net, num_filters, kernel_size, stride, is_training=True): # let us cache the input tensor and downsample it inp = tfl.avg_pool2d(net, kernel_size, stride, padding="SAME") # now convolve with stride (potential downsampling) net = tfl.conv2d(net, num_filters, kernel_size, stride, activation_fn=tf.identity, padding="SAME") # normalize the output net = tfl.batch_norm(net, is_training=is_training, activation_fn=tf.identity) # now convolve again but do not downsample net = tfl.conv2d(net, num_filters, kernel_size, stride=1, activation_fn=tf.identity, padding="SAME") return prelu(tf.concat((net, inp), axis=-1))
def regularization(x, opts, train, reuse=None, prefix=''): if 'x_' not in prefix: if opts.batch_norm: x = layers.batch_norm(x, decay=0.9, center=True, scale=True, is_training=train, scope=prefix + '_bn', reuse=reuse) x = tf.nn.relu(x) x = x if not opts.cnn_layer_dropout else layers.dropout( x, keep_prob=opts.dropout_keep_prob, scope=prefix + '_dropout') return x
def my_bidirectional_dynamic_rnn(self, inp, mask): with tf.variable_scope("my_bi_rnn"): mask = tf.cast(mask, tf.float32) self.my_cell_fw = self.create_cell() self.my_cell_bw = self.create_cell() self.outputs, self.states = tf.nn.bidirectional_dynamic_rnn( cell_fw=self.my_cell_fw, cell_bw=self.my_cell_bw, inputs=inp, dtype=tf.float32) self.output_fw, self.output_bw = self.outputs self.state_fw, self.state_bw = self.states self.output_fw = self.output_fw * mask[:, :, None] self.output_bw = self.output_bw * mask[:, :, None] self.output_fw = batch_norm(self.output_fw, is_training=self.is_training, updates_collections=None) self.output_bw = batch_norm(self.output_bw, is_training=self.is_training, updates_collections=None) return self.output_fw, self.output_bw, self.state_fw, self.state_bw
def __call__(self, image_input, training=False, dropout_rate=0.0): """ Runs the CNN producing the predictions and the gradients. :param image_input: Image input to produce embeddings for. e.g. for EMNIST [batch_size, 28, 28, 1] :param training: A flag indicating training or evaluation :param dropout_rate: A tf placeholder of type tf.float32 indicating the amount of dropout applied :return: Embeddings of size [batch_size, self.num_classes] """ with tf.variable_scope(self.name, reuse=self.reuse): layer_features = [] with tf.variable_scope('FCCLayerNet'): outputs = image_input for i in range(len(self.layer_stage_sizes)): with tf.variable_scope('conv_stage_{}'.format(i)): for j in range(self.inner_layer_depth): with tf.variable_scope('conv_{}_{}'.format(i, j)): outputs = tf.layers.dense( outputs, units=self.layer_stage_sizes[i]) outputs = leaky_relu( outputs, name="leaky_relu{}".format(i)) layer_features.append(outputs) if self.batch_norm_use: outputs = batch_norm(outputs, decay=0.99, scale=True, center=True, is_training=training, renorm=False) outputs = tf.layers.dropout(outputs, rate=dropout_rate, training=training) # apply dropout only at dimensionality # reducing steps, i.e. the last layer in # every group c_conv_encoder = outputs c_conv_encoder = tf.contrib.layers.flatten(c_conv_encoder) c_conv_encoder = tf.layers.dense(c_conv_encoder, units=self.num_classes) self.reuse = True self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.name) if not self.build_completed: self.build_completed = True count_parameters(self.variables, "FCCLayerNet") return c_conv_encoder, layer_features
def model(): tf.set_random_seed(1) print("building model ...") with tf.variable_scope('train'): print("building model ...") X_pl = tf.placeholder(tf.float32, [None, num_features]) print("X_pl", X_pl.get_shape()) t_pl = tf.placeholder(tf.int32, [None,]) print("t_pl", t_pl.get_shape()) is_training_pl = tf.placeholder(tf.bool) X_bn = batch_norm(X_pl, is_training=is_training_pl) print("X_bn", X_bn.get_shape()) l1 = fully_connected(X_pl, num_outputs=100, activation_fn=relu)#, normalizer_fn=batch_norm) print("l1", l1.get_shape()) l1_drop = dropout(l1, is_training=is_training_pl) print("l1_drop", l1_drop.get_shape()) l_out = fully_connected(l1_drop, num_outputs=num_classes, activation_fn=None) print("l_out", l_out.get_shape()) l_out_softmax = tf.nn.softmax(l_out) tf.contrib.layers.summarize_variables() with tf.variable_scope('metrics'): loss = sparse_softmax_cross_entropy_with_logits(l_out, t_pl) print("loss", loss.get_shape()) loss = tf.reduce_mean(loss) print("loss", loss.get_shape()) tf.summary.scalar('train/loss', loss) argmax = tf.to_int32(tf.argmax(l_out, 1)) print("argmax", argmax.get_shape()) correct = tf.to_float(tf.equal(argmax, t_pl)) print("correct,", correct.get_shape()) accuracy = tf.reduce_mean(correct) print("accuracy", accuracy.get_shape()) with tf.variable_scope('optimizer'): print("building optimizer ...") global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) grads_and_vars = optimizer.compute_gradients(loss) gradients, variables = zip(*grads_and_vars) clipped_gradients, global_norm = ( tf.clip_by_global_norm(gradients, clip_norm)) clipped_grads_and_vars = zip(clipped_gradients, variables) tf.summary.scalar('train/global_gradient_norm', global_norm) train_op = optimizer.apply_gradients(clipped_grads_and_vars, global_step=global_step) return X_pl, t_pl, is_training_pl, l_out, l_out_softmax, loss, accuracy, train_op, global_step
def conv_relu3(x_in, n_filt, train_phase, keep_prob=None,use_leaky=False): in_dim = x_in.get_shape().as_list()[3] kernel_shape = [3, 3, in_dim, n_filt] weights = tf.get_variable("weights", kernel_shape, initializer=tf.contrib.layers.xavier_initializer()) biases = tf.get_variable("biases", kernel_shape[-1], initializer=tf.constant_initializer(0.)) conv = tf.nn.conv2d(x_in, weights, strides=[1, 1, 1, 1], padding='SAME') conv = batch_norm(conv, decay=0.99, is_training=train_phase) if keep_prob is not None: conv = tf.nn.dropout(conv, keep_prob) if use_leaky: return tf.nn.leaky_relu(conv+biases) else: return tf.nn.relu(conv + biases)
def batchNormalization(input, is_training=True, reuse=None, name=None, decay=0.999, center=True, scale=True, epsilon=0.001, updates_collections=None): return contrib_layers.batch_norm(inputs=input, is_training=is_training, reuse=reuse, scope=name, decay=decay, center=center, scale=scale, epsilon=epsilon, updates_collections=updates_collections)
from tensorflow.contrib.layers import batch_norm #from batch_norm import batch_norm_new as batch_norm gpu() tf.reset_default_graph() # Two layer network. phase_train_mdn = tf.placeholder(tf.bool) x_flat = tf.placeholder(tf.float32,[3,5]) weights1 = tf.get_variable("weights1", [5, 3], initializer=tf.random_normal_initializer()) biases1 = tf.get_variable("biases1", 1, initializer=tf.constant_initializer(0)) hidden1_b = tf.matmul(x_flat, weights1) hidden1 = batch_norm(hidden1_b, decay=0.8, is_training=phase_train_mdn) hidden1 = hidden1 + biases1 weights2 = tf.get_variable("weights2", [3, 1], initializer=tf.contrib.layers.xavier_initializer()) biases2 = tf.get_variable("biases2", 1, initializer=tf.constant_initializer(0)) hidden2 = tf.matmul(hidden1, weights2) + biases2 y = tf.placeholder(tf.float32,[3,]) step = tf.placeholder(tf.int32) loss = tf.nn.l2_loss(hidden2-y) # learning_rate = tf.train.exponential_decay(
def build_sentence_pair_model(num_timesteps, vocab_size, classifier_fn, is_training, num_classes, train_embeddings=True, initial_embeddings=None): initializer = tf.random_uniform_initializer(-0.005, 0.005) with tf.variable_scope("PairModel", initializer=initializer): ys = tf.placeholder(tf.int32, (FLAGS.batch_size,), "ys") assert FLAGS.model_dim % 2 == 0, "model_dim must be even; we're using LSTM memory cells which are divided in half" def embedding_project_fn(embeddings): if FLAGS.embedding_dim != FLAGS.model_dim: # Need to project embeddings to model dimension. embeddings = util.Linear(embeddings, FLAGS.model_dim, bias=False) if FLAGS.embedding_batch_norm: embeddings = layers.batch_norm(embeddings, center=True, scale=True, is_training=True) if FLAGS.embedding_keep_rate < 1.0: embeddings = tf.cond(is_training, lambda: tf.nn.dropout(embeddings, FLAGS.embedding_keep_rate), lambda: embeddings / FLAGS.embedding_keep_rate) return embeddings # NB: tf.make_template enforces that weights of functions are shared # across the two stack models. ts_args = { "compose_fn": tf.make_template("ts_compose", util.TreeLSTMLayer), "tracking_fn": tf.make_template("ts_track", util.LSTMLayer), "transition_fn": None, "embedding_project_fn": tf.make_template("ts_embedding_project", embedding_project_fn), "batch_size": FLAGS.batch_size, "vocab_size": vocab_size, "num_timesteps": num_timesteps, "model_dim": FLAGS.model_dim, "embedding_dim": FLAGS.embedding_dim, "tracking_dim": FLAGS.tracking_dim, "is_training": is_training, "embeddings": initial_embeddings, } with tf.variable_scope("s1"): ts_1 = ThinStack(**ts_args) with tf.variable_scope("s2"): ts_2 = ThinStack(**ts_args) # Extract just the hidden value of the LSTM (not cell state) repr_dim = FLAGS.model_dim / 2 ts_1_repr = ts_1.final_representations[:, :repr_dim] ts_2_repr = ts_2.final_representations[:, :repr_dim] # Now prep return representations mlp_inputs = [ts_1_repr, ts_2_repr] if FLAGS.use_difference_feature: mlp_inputs.append(ts_2_repr - ts_1_repr) if FLAGS.use_product_feature: mlp_inputs.append(ts_1_repr * ts_2_repr) mlp_input = tf.concat(1, mlp_inputs) if FLAGS.sentence_repr_batch_norm: mlp_input = layers.batch_norm(mlp_input, center=True, scale=True, is_training=True, scope="sentence_repr_bn") if FLAGS.sentence_repr_keep_rate < 1.0: mlp_input = tf.cond(is_training, lambda: tf.nn.dropout(mlp_input, FLAGS.sentence_repr_keep_rate, name="sentence_repr_dropout"), lambda: mlp_input / FLAGS.sentence_repr_keep_rate) logits = classifier_fn(mlp_input) assert logits.get_shape()[1] == num_classes xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, ys) xent_loss = tf.reduce_mean(xent_loss) tf.scalar_summary("xent_loss", xent_loss) rewards = build_rewards(logits, ys) tf.scalar_summary("avg_reward", tf.reduce_mean(rewards)) params = tf.trainable_variables() if not train_embeddings: params.remove(ts_1.embeddings) try: params.remove(ts_2.embeddings) except: pass l2_loss = tf.add_n([tf.reduce_sum(tf.square(param)) for param in params]) tf.scalar_summary("l2_loss", l2_loss) total_loss = xent_loss + FLAGS.l2_lambda * l2_loss xent_gradients = zip(tf.gradients(total_loss, params), params) # TODO enable for transition_fn != None # rl1_gradients = reinforce_episodic_gradients( # ts_1.p_transitions, ts_1.sampled_transitions, rewards, # params=params) # rl2_gradients = reinforce_episodic_gradients( # ts_2.p_transitions, ts_2.sampled_transitions, rewards, # params=params) rl1_gradients, rl2_gradients = [], [] # TODO store magnitudes in summaries? gradients = xent_gradients + rl1_gradients + rl2_gradients return (ts_1, ts_2), logits, ys, gradients
def create_network_residual(self): def conv_residual(x_in,n_filt, train_phase): in_dim = x_in.get_shape().as_list()[3] kernel_shape = [3, 3, in_dim, n_filt] weights = tf.get_variable("weights", kernel_shape, initializer=tf.contrib.layers.xavier_initializer()) biases = tf.get_variable("biases", kernel_shape[-1], initializer=tf.constant_initializer(0.)) conv = tf.nn.conv2d(x_in, weights, strides=[1, 1, 1, 1], padding='SAME') conv = batch_norm(conv, decay=0.99, is_training=train_phase) return conv m_sz = min(self.conf.imsz)/self.conf.unet_rescale max_layers = int(math.ceil(math.log(m_sz,2)))-1 sel_sz = self.conf.sel_sz n_layers = int(math.ceil(math.log(sel_sz,2)))+2 n_layers = min(max_layers,n_layers) - 2 # n_layers = 6 n_conv = self.n_conv conv = lambda a, b: conv_relu3( a,b,self.ph['phase_train'], keep_prob=None, use_leaky=self.conf.unet_use_leaky) layers = [] up_layers = [] layers_sz = [] X = self.inputs[0] n_out = self.conf.n_classes all_layers = [] # downsample n_filt = 128 n_filt_base = 32 max_filt = 512 # n_filt_base = 16 # max_filt = 256 for ndx in range(n_layers): n_filt = min(max_filt, n_filt_base * (2** (ndx))) if ndx == 0: with tf.variable_scope('layerdown_{}'.format(ndx)): X_sh = conv_residual(X, n_filt, self.ph['phase_train']) else: X_sh = X X_in = X with tf.variable_scope('layerdown_{}_0'.format(ndx)): X = conv_residual(X, n_filt, self.ph['phase_train']) X = tf.nn.leaky_relu(X) with tf.variable_scope('layerdown_{}_1'.format(ndx)): X = conv_residual(X, n_filt, self.ph['phase_train']) X = X + X_sh X = tf.nn.leaky_relu(X) all_layers.append(X) layers.append(X) layers_sz.append(X.get_shape().as_list()[1:3]) in_dim = X.get_shape().as_list()[3] n_filt = min(max_filt, n_filt_base * (2** (ndx+1))) kernel_shape = [3, 3, in_dim, n_filt] with tf.variable_scope('layerdown_{}_2'.format(ndx)): weights = tf.get_variable("weights1", kernel_shape, initializer=tf.contrib.layers.xavier_initializer()) biases = tf.get_variable("biases1", kernel_shape[-1], initializer=tf.constant_initializer(0.)) conv = tf.nn.conv2d(X, weights, strides=[1, 2, 2, 1], padding='SAME') conv = batch_norm(conv, decay=0.99, is_training=self.ph['phase_train']) X = conv # X = tf.nn.relu(conv + biases) self.down_layers = layers # few more convolution for the final layers top_layers = [] X_top_in = X n_filt = min(max_filt, n_filt_base * (2** (n_layers))) with tf.variable_scope('top_layer_{}_0'.format(n_layers)): X = conv_residual(X, n_filt, self.ph['phase_train']) X = tf.nn.leaky_relu(X) with tf.variable_scope('top_layer_{}_1'.format(n_layers)): X = conv_residual(X, n_filt, self.ph['phase_train']) X += X_top_in X = tf.nn.leaky_relu(X) top_layers.append(X) self.top_layers = top_layers all_layers.extend(top_layers) # upsample for ndx in reversed(range(n_layers)): X = CNB.upscale('u_'.format(ndx), X, layers_sz[ndx]) n_filt = min(max_filt, n_filt_base* (2** ndx)) with tf.variable_scope('layerup_{}'.format(ndx)): X = conv_residual(X, n_filt, self.ph['phase_train']) X = X + layers[ndx] X_in = X with tf.variable_scope('layerup_{}_0'.format(ndx)): X = conv_residual(X, n_filt, self.ph['phase_train']) X = tf.nn.leaky_relu(X) with tf.variable_scope('layerup_{}_1'.format(ndx)): X = conv_residual(X, n_filt, self.ph['phase_train']) X += X_in X = tf.nn.leaky_relu(X) all_layers.append(X) up_layers.append(X) self.all_layers = all_layers self.up_layers = up_layers # final conv weights = tf.get_variable("out_weights", [3,3,n_filt,n_out], initializer=tf.contrib.layers.xavier_initializer()) biases = tf.get_variable("out_biases", n_out, initializer=tf.constant_initializer(0.)) conv = tf.nn.conv2d(X, weights, strides=[1, 1, 1, 1], padding='SAME') X = tf.add(conv, biases, name = 'unet_pred') # X = conv+biases return X
def build_sentence_pair_rnn_model(num_timesteps, vocab_size, classifier_fn, is_training, num_classes, train_embeddings=True, initial_embeddings=None): with tf.variable_scope("PairRNNModel"): ys = tf.placeholder(tf.int32, (FLAGS.batch_size,), "ys") assert FLAGS.model_dim % 2 == 0, "model_dim must be even; we're using LSTM memory cels which are divided in half" def embedding_project_fn(embeddings): if FLAGS.embedding_dim != FLAGS.model_dim: # Need to project embeddings to model dimension. embeddings = util.Linear(embeddings, FLAGS.model_dim, bias=False) if FLAGS.embedding_batch_norm: embeddings = layers.batch_norm(embeddings, center=True, scale=True, is_training=True) if FLAGS.embedding_keep_rate < 1.0: embeddings = tf.cond(is_training, lambda: tf.nn.dropout(embeddings, FLAGS.embedding_keep_rate), lambda: embeddings / FLAGS.embedding_keep_rate) return embeddings # Share scope across the two models. (==> shared embedding projection / # BN weights) embedding_project_fn = tf.make_template("embedding_project", embedding_project_fn) s1_inputs = [tf.placeholder(tf.int32, (FLAGS.batch_size,), "s1_input_%i" % t) for t in range(num_timesteps)] s2_inputs = [tf.placeholder(tf.int32, (FLAGS.batch_size,), "s2_input_%i" % t) for t in range(num_timesteps)] s1_lengths = tf.placeholder(tf.int32, (FLAGS.batch_size,), "s1_lengths") s2_lengths = tf.placeholder(tf.int32, (FLAGS.batch_size,), "s2_lengths") with tf.device("/cpu:0"): embeddings = tf.get_variable("embeddings", (vocab_size, FLAGS.embedding_dim)) s1_embedded = [embedding_project_fn(tf.nn.embedding_lookup(embeddings, s1_input_t)) for s1_input_t in s1_inputs] s2_embedded = [embedding_project_fn(tf.nn.embedding_lookup(embeddings, s2_input_t)) for s2_input_t in s2_inputs] cell = tf.nn.rnn_cell.BasicLSTMCell(FLAGS.model_dim / 2) with tf.variable_scope("s1"): _, s1_state = tf.nn.rnn(cell, s1_embedded, sequence_lengths=s1_lengths) with tf.variable_scope("s2"): _, s2_state = tf.nn.rnn(cell, s2_embedded, sequence_lengths=s2_lengths) # Now prep return representations mlp_inputs = [s1_state, s2_state] if FLAGS.use_difference_feature: mlp_inputs.append(s2_state - s1_state) if FLAGS.use_product_feature: mlp_inputs.append(s1_state * s2_state) mlp_input = tf.concat(1, mlp_inputs) if FLAGS.sentence_repr_batch_norm: mlp_input = layers.batch_norm(mlp_input, center=True, scale=True, is_training=True, scope="sentence_repr_bn") if FLAGS.sentence_repr_keep_rate < 1.0: mlp_input = tf.cond(is_training, lambda: tf.nn.dropout(mlp_input, FLAGS.sentence_repr_keep_rate, name="sentence_repr_dropout"), lambda: mlp_input / FLAGS.sentence_repr_keep_rate) logits = classifier_fn(mlp_input) assert logits.get_shape()[1] == num_classes xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, ys) xent_loss = tf.reduce_mean(xent_loss) tf.scalar_summary("xent_loss", xent_loss) rewards = build_rewards(logits, ys) tf.scalar_summary("avg_reward", tf.reduce_mean(rewards)) params = tf.trainable_variables() if not train_embeddings: params.remove(embeddings) l2_loss = tf.add_n([tf.reduce_sum(tf.square(param)) for param in params]) tf.scalar_summary("l2_loss", l2_loss) total_loss = xent_loss + FLAGS.l2_lambda * l2_loss xent_gradients = zip(tf.gradients(total_loss, params), params) return None, logits, ys, gradients