def weighted2pred(self,arg1,arg2,bias=True,activation=None): ''' TODO compute h*=tanh(W1*arg1+W2*arg2) softmax(W x (h*)) ''' with tf.variable_scope("weighted2predict_layer"): weighted_arg1= tf.layers.dense(inputs=arg1, units=self.config.hidden_units, activation=None, use_bias=False, kernel_regularizer=l2_regularizer(self.config.l2_strength), name="weight_arg1") weighted_arg2= tf.layers.dense(inputs=arg2, units=self.config.hidden_units, activation=None, use_bias=False, kernel_regularizer=l2_regularizer(self.config.l2_strength), name="weight_arg2") if acivation is not None: hstar = activation(tf.add(weight_arg1,weight_arg2,name="hstar")) else: hstar = tf.add(weight_arg1,weight_arg2,name="hstar") h_predict= tf.layers.dense(inputs=hstar, units=self.config.num_classes, activation=None, use_bias=True, kernel_regularizer=l2_regularizer(self.config.l2_strength), name="h_predict") pred = tf.nn.softmax(h_predict,name="pred") return pred
def conv1d(self, net, num_ker, ker_size, stride): # 1D-convolution net = convolution2d( net, num_outputs=num_ker, kernel_size=[ker_size, 1], stride=[stride, 1], padding='SAME', activation_fn=None, normalizer_fn=None, weights_initializer=variance_scaling_initializer(), weights_regularizer=l2_regularizer(self.weight_decay), biases_initializer=tf.zeros_initializer) return net
def __call__(self, x, is_training = True): with tf.variable_scope(self.name) as scope: with arg_scope([tcl.batch_norm], is_training=is_training, scale=True): with arg_scope([tcl.conv2d, tcl.conv2d_transpose], activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm, biases_initializer=None, padding='SAME', weights_regularizer=tcl.l2_regularizer(0.0002)): size = 16 # x: s x s x 3 se = tcl.conv2d(x, num_outputs=size, kernel_size=4, stride=1) # 256 x 256 x 16 se = resBlock(se, num_outputs=size * 2, kernel_size=4, stride=2) # 128 x 128 x 32 se = resBlock(se, num_outputs=size * 2, kernel_size=4, stride=1) # 128 x 128 x 32 se = resBlock(se, num_outputs=size * 4, kernel_size=4, stride=2) # 64 x 64 x 64 se = resBlock(se, num_outputs=size * 4, kernel_size=4, stride=1) # 64 x 64 x 64 se = resBlock(se, num_outputs=size * 8, kernel_size=4, stride=2) # 32 x 32 x 128 se = resBlock(se, num_outputs=size * 8, kernel_size=4, stride=1) # 32 x 32 x 128 se = resBlock(se, num_outputs=size * 16, kernel_size=4, stride=2) # 16 x 16 x 256 se = resBlock(se, num_outputs=size * 16, kernel_size=4, stride=1) # 16 x 16 x 256 se = resBlock(se, num_outputs=size * 32, kernel_size=4, stride=2) # 8 x 8 x 512 se = resBlock(se, num_outputs=size * 32, kernel_size=4, stride=1) # 8 x 8 x 512 pd = tcl.conv2d_transpose(se, size * 32, 4, stride=1) # 8 x 8 x 512 pd = tcl.conv2d_transpose(pd, size * 16, 4, stride=2) # 16 x 16 x 256 pd = tcl.conv2d_transpose(pd, size * 16, 4, stride=1) # 16 x 16 x 256 pd = tcl.conv2d_transpose(pd, size * 16, 4, stride=1) # 16 x 16 x 256 pd = tcl.conv2d_transpose(pd, size * 8, 4, stride=2) # 32 x 32 x 128 pd = tcl.conv2d_transpose(pd, size * 8, 4, stride=1) # 32 x 32 x 128 pd = tcl.conv2d_transpose(pd, size * 8, 4, stride=1) # 32 x 32 x 128 pd = tcl.conv2d_transpose(pd, size * 4, 4, stride=2) # 64 x 64 x 64 pd = tcl.conv2d_transpose(pd, size * 4, 4, stride=1) # 64 x 64 x 64 pd = tcl.conv2d_transpose(pd, size * 4, 4, stride=1) # 64 x 64 x 64 pd = tcl.conv2d_transpose(pd, size * 2, 4, stride=2) # 128 x 128 x 32 pd = tcl.conv2d_transpose(pd, size * 2, 4, stride=1) # 128 x 128 x 32 pd = tcl.conv2d_transpose(pd, size, 4, stride=2) # 256 x 256 x 16 pd = tcl.conv2d_transpose(pd, size, 4, stride=1) # 256 x 256 x 16 pd = tcl.conv2d_transpose(pd, 3, 4, stride=1) # 256 x 256 x 3 pd = tcl.conv2d_transpose(pd, 3, 4, stride=1) # 256 x 256 x 3 pos = tcl.conv2d_transpose(pd, 3, 4, stride=1, activation_fn = tf.nn.sigmoid)#, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) return pos
def __init__(self, params, device_assigner=None, optimizer_class=adagrad.AdagradOptimizer, **kwargs): self.device_assigner = ( device_assigner or tensor_forest.RandomForestDeviceAssigner()) self.params = params self.optimizer = optimizer_class(self.params.learning_rate) self.is_regression = params.regression self.regularizer = None if params.regularization == "l1": self.regularizer = layers.l1_regularizer( self.params.regularization_strength) elif params.regularization == "l2": self.regularizer = layers.l2_regularizer( self.params.regularization_strength)
def __init__(self, params, device_assigner=None, optimizer_class=adagrad.AdagradOptimizer, **kwargs): self.device_assigner = ( device_assigner or framework_variables.VariableDeviceChooser()) self.params = params self.optimizer = optimizer_class(self.params.learning_rate) self.is_regression = params.regression self.regularizer = None if params.regularization == "l1": self.regularizer = layers.l1_regularizer( self.params.regularization_strength) elif params.regularization == "l2": self.regularizer = layers.l2_regularizer( self.params.regularization_strength)
import tensorflow as tf import tensorflow.contrib.layers as cont_layers from cnn import mnist_conf from cnn import infer conf = mnist_conf.Conf() regul = cont_layers.l2_regularizer(conf.regularizer_weight) with tf.variable_scope("input"): x = tf.placeholder(dtype=tf.float32, shape=[None, 28, 28, 1], name="input-x") y = tf.placeholder(dtype=tf.int32, shape=[None], name="input-y") logits = infer.cnn_infer(x, regul) global_step = tf.get_variable("global_step", shape=[], initializer=tf.zeros_initializer(), trainable=False) with tf.variable_scope("optimizer"): cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)) total_loss = cross_entropy + tf.add_n(tf.get_collection("losses")) print("shape", conf.x_train.shape[0]) learning_rate = tf.train.exponential_decay( learning_rate=conf.learning_rate_base, global_step=global_step, decay_steps=int(conf.x_train.shape[0] / conf.batch_size), decay_rate=conf.learning_rate_decay, staircase=True) train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize( total_loss, global_step=global_step)
def _build_model(self, inputs): self.inputs = inputs if self.data_format == 'NCHW': reduction_axis = [2, 3] _inputs = tf.cast(tf.transpose(inputs, [0, 3, 1, 2]), tf.float32) else: reduction_axis = [1, 2] _inputs = tf.cast(inputs, tf.float32) with arg_scope([layers.conv2d], num_outputs=16, kernel_size=3, stride=1, padding='SAME', data_format=self.data_format, activation_fn=None, weights_initializer=layers.variance_scaling_initializer(), weights_regularizer=layers.l2_regularizer(2e-4), biases_initializer=tf.constant_initializer(0.2), biases_regularizer=None),\ arg_scope([layers.batch_norm], decay=0.9, center=True, scale=True, updates_collections=None, is_training=self.is_training, fused=True, data_format=self.data_format),\ arg_scope([layers.avg_pool2d], kernel_size=[3,3], stride=[2,2], padding='SAME', data_format=self.data_format): with tf.variable_scope('Layer1'): conv = layers.conv2d(_inputs, num_outputs=64, kernel_size=3) actv = tf.nn.relu(layers.batch_norm(conv)) with tf.variable_scope('Layer2'): conv = layers.conv2d(actv) actv = tf.nn.relu(layers.batch_norm(conv)) with tf.variable_scope('Layer3'): conv1 = layers.conv2d(actv) actv1 = tf.nn.relu(layers.batch_norm(conv1)) conv2 = layers.conv2d(actv1) bn2 = layers.batch_norm(conv2) res = tf.add(actv, bn2) with tf.variable_scope('Layer4'): conv1 = layers.conv2d(res) actv1 = tf.nn.relu(layers.batch_norm(conv1)) conv2 = layers.conv2d(actv1) bn2 = layers.batch_norm(conv2) res = tf.add(res, bn2) with tf.variable_scope('Layer5'): conv1 = layers.conv2d(res) actv1 = tf.nn.relu(layers.batch_norm(conv1)) conv2 = layers.conv2d(actv1) bn = layers.batch_norm(conv2) res = tf.add(res, bn) with tf.variable_scope('Layer6'): conv1 = layers.conv2d(res) actv1 = tf.nn.relu(layers.batch_norm(conv1)) conv2 = layers.conv2d(actv1) bn = layers.batch_norm(conv2) res = tf.add(res, bn) with tf.variable_scope('Layer7'): conv1 = layers.conv2d(res) actv1 = tf.nn.relu(layers.batch_norm(conv1)) conv2 = layers.conv2d(actv1) bn = layers.batch_norm(conv2) res = tf.add(res, bn) with tf.variable_scope('Layer8'): convs = layers.conv2d(res, kernel_size=1, stride=2) convs = layers.batch_norm(convs) conv1 = layers.conv2d(res) actv1 = tf.nn.relu(layers.batch_norm(conv1)) conv2 = layers.conv2d(actv1) bn = layers.batch_norm(conv2) pool = layers.avg_pool2d(bn) res = tf.add(convs, pool) with tf.variable_scope('Layer9'): convs = layers.conv2d(res, num_outputs=64, kernel_size=1, stride=2) convs = layers.batch_norm(convs) conv1 = layers.conv2d(res, num_outputs=64) actv1 = tf.nn.relu(layers.batch_norm(conv1)) conv2 = layers.conv2d(actv1, num_outputs=64) bn = layers.batch_norm(conv2) pool = layers.avg_pool2d(bn) res = tf.add(convs, pool) with tf.variable_scope('Layer10'): convs = layers.conv2d(res, num_outputs=128, kernel_size=1, stride=2) convs = layers.batch_norm(convs) conv1 = layers.conv2d(res, num_outputs=128) actv1 = tf.nn.relu(layers.batch_norm(conv1)) conv2 = layers.conv2d(actv1, num_outputs=128) bn = layers.batch_norm(conv2) pool = layers.avg_pool2d(bn) res = tf.add(convs, pool) with tf.variable_scope('Layer11'): convs = layers.conv2d(res, num_outputs=256, kernel_size=1, stride=2) convs = layers.batch_norm(convs) conv1 = layers.conv2d(res, num_outputs=256) actv1 = tf.nn.relu(layers.batch_norm(conv1)) conv2 = layers.conv2d(actv1, num_outputs=256) bn = layers.batch_norm(conv2) pool = layers.avg_pool2d(bn) res = tf.add(convs, pool) with tf.variable_scope('Layer12'): conv1 = layers.conv2d(res, num_outputs=512) actv1 = tf.nn.relu(layers.batch_norm(conv1)) conv2 = layers.conv2d(actv1, num_outputs=512) bn = layers.batch_norm(conv2) avgp = tf.reduce_mean(bn, reduction_axis, keepdims=True) ip = layers.fully_connected( layers.flatten(avgp), num_outputs=2, activation_fn=None, normalizer_fn=None, weights_initializer=tf.random_normal_initializer(mean=0., stddev=0.01), biases_initializer=tf.constant_initializer(0.), scope='ip') self.outputs = ip return self.outputs
def _build_z2_encoder(self, inputs, z1, reuse=False): weights_regularizer = l2_regularizer(self._train_conf["l2_weight"]) normalizer_fn = batch_norm if self._model_conf["if_bn"] else None normalizer_params = None if self._model_conf["if_bn"]: normalizer_params = { "scope": "BatchNorm", "is_training": self._feed_dict["is_train"], "reuse": reuse } # TODO: need to upgrade to latest, # which commit support param_regularizers args if not hasattr(self, "_debug_outputs"): self._debug_outputs = {} C, T, F = self._model_conf["target_shape"] n_concur = self._model_conf["rec_z2_enc_concur"] if T % n_concur != 0: raise ValueError("total time steps must be multiples of %s" % (n_concur)) n_frame = T // n_concur info("z2_encoder: n_frame=%s, n_concur=%s" % (n_frame, n_concur)) # input_dim = np.prod(inputs.get_shape().as_list()[1:]) # outputs = tf.concat([tf.reshape(inputs, [-1, input_dim]), z1], axis=1) with tf.variable_scope("z2_enc", reuse=reuse): # recurrent layers if self._model_conf["rec_z2_enc"]: # reshape to (N, n_frame, n_concur*C*F) inputs = array_ops.transpose(inputs, (0, 2, 1, 3)) inputs_shape = inputs.get_shape().as_list() inputs_depth = np.prod(inputs_shape[2:]) new_shape = (-1, n_frame, n_concur * inputs_depth) inputs = tf.reshape(inputs, new_shape) # append z1 to each frame tiled_z1 = tf.tile(tf.expand_dims(z1, 1), (1, n_frame, 1)) inputs = tf.concat([inputs, tiled_z1], axis=-1) self._debug_outputs["inp_reshape"] = inputs if self._model_conf["rec_z2_enc_bi"]: raise NotImplementedError else: Cell = _cell_dict[self._model_conf["rec_cell_type"]] cell = MultiRNNCell([Cell(hu) \ for hu in self._model_conf["rec_z2_enc"]]) if self._model_conf["rec_learn_init"]: raise NotImplementedError else: input_shape = tuple(array_ops.shape(input_) \ for input_ in nest.flatten(inputs)) batch_size = input_shape[0][0] init_state = cell.zero_state( batch_size, self._model_conf["input_dtype"]) _, final_states = dynamic_rnn( cell, inputs, dtype=self._model_conf["input_dtype"], initial_state=init_state, time_major=False, scope="z2_enc_%sL_rec" % len(self._model_conf["rec_z2_enc"])) self._debug_outputs["raw_rnn_out"] = _ self._debug_outputs["raw_rnn_final"] = final_states if self._model_conf["rec_z2_enc_out"].startswith("last"): final_states = final_states[-1:] if self._model_conf["rec_cell_type"] == "lstm": outputs = [] for state in final_states: if "h" in self._model_conf["rec_z2_enc_out"].split( "_")[1]: outputs.append(state.h) if "c" in self._model_conf["rec_z2_enc_out"].split( "_")[1]: outputs.append(state.c) else: outputs = final_states outputs = tf.concat(outputs, axis=-1) self._debug_outputs["concat_rnn_out"] = outputs else: input_dim = np.prod(inputs.get_shape().as_list()[1:]) outputs = tf.concat([tf.reshape(inputs, [-1, input_dim]), z1], axis=1) # fully connected layers output_dim = np.prod(outputs.get_shape().as_list()[1:]) outputs = tf.reshape(outputs, [-1, output_dim]) for i, hu in enumerate(self._model_conf["hu_z2_enc"]): outputs = fully_connected( inputs=outputs, num_outputs=hu, activation_fn=nn.relu, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, weights_regularizer=weights_regularizer, reuse=reuse, scope="z2_enc_fc%s" % (i + 1)) z2_mu, z2_logvar, z2 = dense_latent( outputs, self._model_conf["n_latent2"], logvar_nl=self._model_conf["z2_logvar_nl"], reuse=reuse, scope="z2_enc_lat") return [z2_mu, z2_logvar], z2
def build_model(mode, images, labels, training_method='baseline', num_classes=10, depth=10, width=4): """Build the wide ResNet model for training or eval. If regularizer is specified, a regularizer term is added to the loss function. The regularizer term is computed using either the pre-softmax activation or an auxiliary network logits layer based upon activations earlier in the network after the first resnet block. Args: mode: String for whether training or evaluation is taking place. images: A 4D float32 tensor containing the model input images. labels: A int32 tensor of size (batch size, number of classes) containing the model labels. training_method: The method used to sparsify the network weights. num_classes: The number of distinct labels in the dataset. depth: Number of core convolutional layers in the network. width: The width of the convolurional filters in the resnet block. Returns: total_loss: A 1D float32 tensor that is the sum of cross-entropy and all regularization losses. accuracy: A 1D float32 accuracy tensor. Raises: ValueError: if depth is not the minimum amount required to build the model. """ regularizer_term = tf.constant(FLAGS.l2, tf.float32) kernel_regularizer = contrib_layers.l2_regularizer(scale=regularizer_term) # depth should be 6n+4 where n is the desired number of resnet blocks # if n=2,depth=10 n=3,depth=22, n=5,depth=34 n=7,depth=46 if (depth - 4) % 6 != 0: raise ValueError('Depth of ResNet specified not sufficient.') if mode == 'train': is_training = True else: is_training = False # 'threshold' would create layers with mask. pruning_method = 'baseline' if training_method == 'baseline' else 'threshold' model = WideResNetModel(is_training=is_training, regularizer=kernel_regularizer, data_format='channels_last', pruning_method=pruning_method, prune_first_layer=FLAGS.prune_first_layer, prune_last_layer=FLAGS.prune_last_layer) logits = model.build(images, depth=depth, width=width, num_classes=num_classes) global_step = tf.train.get_or_create_global_step() predictions = tf.cast(tf.argmax(logits, axis=1), tf.int32) accuracy = tf.reduce_mean( tf.cast(tf.equal(labels, predictions), tf.float32)) in_top_5 = tf.cast(tf.nn.in_top_k(predictions=logits, targets=labels, k=5), tf.float32) top_5_accuracy = tf.cast(tf.reduce_mean(in_top_5), tf.float32) return global_step, accuracy, top_5_accuracy, logits
def forward(self, encoder_inputs, trainable=True, is_training=True, reuse=False, with_batchnorm=False): with tf.variable_scope(self.name_scope, reuse=reuse) as vs: if (reuse): vs.reuse_variables() lrelu = VAE.lrelu if (with_batchnorm): print('here') # h0 = lrelu(tcl.batch_norm(tcl.conv2d(encoder_inputs, # num_outputs=self.nfilters * 4, # stride=2, # kernel_size=[2, 7], # activation_fn=None, # padding='SAME', # biases_initializer=None, # weights_regularizer=tcl.l2_regularizer(self.re_term), # scope="conv1"), # scope='bn1', # trainable=trainable, # is_training=is_training)) # # h0 = lrelu(tcl.batch_norm(tcl.conv2d(h0, # num_outputs=self.nfilters * 4, # stride=2, # kernel_size=[2, 7], # activation_fn=None, # padding='SAME', # scope="conv2", # weights_regularizer=tcl.l2_regularizer(self.re_term), # biases_initializer=None), # trainable=trainable, # scope='bn2', # is_training=is_training)) # # h0 = tcl.dropout(h0, 0.8, is_training=is_training) # # h0 = lrelu(tcl.batch_norm(tcl.conv2d(h0, # num_outputs=self.nfilters * 8, # stride=2, # kernel_size=[2, 7], # activation_fn=None, # padding='SAME', # scope="con c_h0 = tf.nn.atrous_conv2d(c_h0, filters= self.atrous_filter, rate=1, padding="SAME") # c_h0 = tcl.dropout(c_h0, 0.5, is_training=is_training)v3", # weights_regularizer=tcl.l2_regularizer(self.re_term), # biases_initializer=None), # trainable=trainable, # scope='bn3', # is_training=is_training)) else: # c_h0 = lrelu(tcl.conv2d(encoder_inputs, # num_outputs=self.nfilters * 4, # stride=2, # kernel_size=[2, 7], # activation_fn=None, # padding='SAME', # biases_initializer=None, # weights_regularizer=tcl.l2_regularizer(self.re_term), # scope="conv1")) # c_h0 = tcl.dropout(c_h0, 0.8, is_training=is_training) # # # c_h0 = tf.nn.conv2d(c_h0, filter=self.atrous_filter, strides=[1,2,2,1], padding="SAME") # # c_h0 = tcl.dropout(c_h0, 0.5, is_training=is_training) # # # # c_h0 = lrelu(tcl.conv2d(c_h0, # num_outputs=self.nfilters * 8, # stride=2, # kernel_size=[2, 7], # activation_fn=None, # padding='SAME', # scope="conv2", # weights_regularizer=tcl.l2_regularizer(self.re_term), # biases_initializer=None)) # c_h0 = tcl.dropout(c_h0, 0.8, is_training=is_training) # # c_h0 = lrelu(tcl.conv2d(c_h0, # num_outputs=self.nfilters * 8, # stride=2, # kernel_size=[2, 7], # activation_fn=None, # padding='SAME', # scope="conv3", # weights_regularizer=tcl.l2_regularizer(self.re_term), # biases_initializer=None)) # c_h0 = tcl.dropout(c_h0, 0.5, is_training=is_training) ############################################################################## h0 = tcl.flatten(encoder_inputs) h0 = lrelu( tcl.fully_connected(h0, 600, weights_regularizer=tcl.l2_regularizer( self.re_term), scope="fc1", activation_fn=None)) h0 = tcl.dropout(h0, 0.8, is_training=is_training) h0 = lrelu( tcl.fully_connected(h0, 300, weights_regularizer=tcl.l2_regularizer( self.re_term), scope="fc2", activation_fn=None)) h0 = tcl.dropout(h0, 0.8, is_training=is_training) h0 = lrelu( tcl.fully_connected(h0, 100, weights_regularizer=tcl.l2_regularizer( self.re_term), scope="fc3", activation_fn=None)) h0 = tcl.dropout(h0, 0.8, is_training=is_training) c_h0 = tcl.flatten(h0) c_h0 = tcl.fully_connected(c_h0, self.encoded_dim, weights_regularizer=tcl.l2_regularizer( self.re_term), scope="c_fc4", activation_fn=None) # h0 = tcl.flatten(h0) # # h0 = tcl.fully_connected(h0, self.encoded_dim, # weights_regularizer=tcl.l2_regularizer(self.re_term), # scope="fc4", activation_fn=None) # # t_h0 = tf.concat([h0, c_h0], 1) # # Attention layer # ATTENTION_SIZE=50 # with tf.name_scope('Attention_layer'): # attention_output, alphas = attention(h0, ATTENTION_SIZE, return_alphas=True) # tf.summary.histogram('alphas', alphas) # # # Dropout # drop = tf.nn.dropout(attention_output, 0.8) return c_h0
is_training = tf.placeholder(tf.bool, shape=(), name='is_training') keep_prob = tf.placeholder(tf.float32, shape=(), name='keep_prob') bn_params = { 'is_training': is_training, 'decay': 0.99, 'updates_collections': None } with tf.name_scope('model'): # Layer 1: Convolutional. Input = 32x32x1. Output = 28x28x6. conv1 = conv2d(inputs=X, num_outputs=6, kernel_size=(5, 5), stride=(1, 1), padding='valid', weights_regularizer=l2_regularizer(scale=reg_constant), normalizer_fn=batch_norm, normalizer_params=bn_params, activation_fn=tf.nn.relu, scope='conv1') # Pooling. Input = 28x28x6. Output = 14x14x6. conv1 = max_pool2d(conv1, kernel_size=(2, 2)) # Convolutional. Output = 10x10x16. conv2 = conv2d(inputs=conv1, num_outputs=16, kernel_size=(5, 5), stride=(1, 1), padding='valid', weights_regularizer=l2_regularizer(scale=reg_constant),
def l2_regularizer(scale=1.0): return contrib_layers.l2_regularizer(scale=scale)
tf.identity( tf.add( tf.matmul( tf.nn.sigmoid( tf.add( tf.matmul( tf.nn.sigmoid( tf.add( tf.matmul( tf.nn.sigmoid( tf.matmul(X, V1) + mu1), V2), mu2)), V3), mu3)), S3), pi3)), mapping)) / 3), 1, keep_dims=True)) + layers.apply_regularization( layers.l2_regularizer(scale=lambdaR), weights_list=[V1, V2, V3, S1, S2, S3]) optimizer = layers.optimize_loss(loss=loss, global_step=tf.train.get_global_step(), learning_rate=learning_rate, optimizer=tf.train.AdamOptimizer, summaries=[ "learning_rate", "loss", "gradients", "gradient_norm", ]) saver = tf.train.Saver() tf.summary.scalar("loss", loss)
from legacy.input import Input _mp = MovieQAPath() hp = { 'emb_dim': 512, 'feat_dim': 512, 'learning_rate': 10**(-4), 'decay_rate': 0.5, 'decay_type': 'exp', 'decay_epoch': 2, 'opt': 'adam', 'checkpoint': '', 'dropout_rate': 0.1 } reg = layers.l2_regularizer(0.01) def dropout(x, training): return tf.layers.dropout(x, hp['dropout_rate'], training=training) def make_mask(x, length): return tf.tile(tf.expand_dims(tf.sequence_mask(x, maxlen=length), axis=-1), [1, 1, hp['emb_dim']]) def mask_tensor(x, mask): zeros = tf.zeros_like(x) x = tf.where(mask, x, zeros)
def _build_graph(self, learning_rate, epoch, is_training): from tensorflow.contrib import layers from tf_utils.layers import conv2d, max_pool, rescale_bilinear, avg_pool, bn_relu from tf_utils.losses import multiclass_hinge_loss def get_ortho_penalty(): vars = tf.contrib.framework.get_variables('') filt = lambda x: 'conv' in x.name and 'weights' in x.name weight_vars = list(filter(filt, vars)) loss = tf.constant(0.0) for v in weight_vars: m = tf.reshape(v, (-1, v.shape[3].value)) d = tf.matmul( m, m, True) - tf.eye(v.shape[3].value) / v.shape[3].value loss += tf.reduce_sum(d**2) return loss input_shape = [None] + list(self.input_shape) output_shape = [None, self.class_count] # Input image and labels placeholders input = tf.placeholder(tf.float32, shape=input_shape, name='input') target = tf.placeholder(tf.float32, shape=output_shape, name='target') # L2 regularization weight_decay = tf.constant(self.weight_decay, dtype=tf.float32) # Hidden layers h = input with tf.contrib.framework.arg_scope( [layers.conv2d], kernel_size=5, data_format='NHWC', padding='SAME', activation_fn=tf.nn.relu, weights_initializer=layers.variance_scaling_initializer(), weights_regularizer=layers.l2_regularizer(weight_decay)): h = layers.conv2d(h, 16, scope='convrelu1') h = layers.max_pool2d(h, 2, 2, scope='pool1') h = layers.conv2d(h, 32, scope='convrelu2') h = layers.max_pool2d(h, 2, 2, scope='pool2') with tf.contrib.framework.arg_scope( [layers.fully_connected], activation_fn=tf.nn.relu, weights_initializer=layers.variance_scaling_initializer(), weights_regularizer=layers.l2_regularizer(weight_decay)): h = layers.flatten(h, scope='flatten3') h = layers.fully_connected(h, 512, scope='fc3') self._print_vars() # Softmax classification logits = layers.fully_connected(h, self.class_count, activation_fn=None, scope='logits') probs = tf.nn.softmax(logits, name='probs') # Loss mhl = lambda t, lo: 0.1 * multiclass_hinge_loss(t, lo) sce = tf.losses.softmax_cross_entropy loss = (mhl if self.use_multiclass_hinge_loss else sce)(target, logits) loss = loss + tf.reduce_sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) if self.ortho_penalty > 0: loss += self.ortho_penalty * get_ortho_penalty() # Optimization optimizer = tf.train.AdamOptimizer(learning_rate) training_step = optimizer.minimize(loss) # Dense predictions and labels preds, dense_labels = tf.argmax(probs, 1), tf.argmax(target, 1) # Other evaluation measures accuracy = tf.reduce_mean( tf.cast(tf.equal(preds, dense_labels), tf.float32)) return AbstractModel.EssentialNodes(input=input, target=target, probs=probs, loss=loss, training_step=training_step, evaluation={'accuracy': accuracy})
def __init__(self, is_training, config): batch_size = config.batch_size self.config = config self.is_training = is_training self.global_step = tf.Variable(0, trainable=False) self.x = tf.placeholder(tf.int32, [self.config.batch_size, self.config.xmaxlen]) self.y = tf.placeholder(tf.int32, [self.config.batch_size, self.config.ymaxlen]) self.x_mask = tf.placeholder(tf.int32, [self.config.batch_size, self.config.xmaxlen]) self.y_mask = tf.placeholder(tf.int32, [self.config.batch_size, self.config.ymaxlen]) self.x_mask = tf.cast(self.x_mask,tf.float32) self.y_mask = tf.cast(self.y_mask,tf.float32) self.x_len = tf.placeholder(tf.int32, [self.config.batch_size,]) self.y_len = tf.placeholder(tf.int32, [self.config.batch_size,]) self.x_len = tf.cast(self.x_len,tf.float32) self.y_len = tf.cast(self.y_len,tf.float32) self.label = tf.placeholder(tf.int32, [self.config.batch_size,self.config.num_classes]) with tf.device("/cpu:0"): embedding_matrix=np.load("../data/glove/snli_glove.npy") #embedding_matrix=np.load(self.config.glove_dir) embedding = tf.Variable(embedding_matrix,trainable=False, name="embedding") input_xemb = tf.nn.embedding_lookup(embedding, self.x) input_yemb = tf.nn.embedding_lookup(embedding,self.y) if is_training and config.keep_prob < 1: input_xemb = tf.nn.dropout(input_xemb, config.keep_prob) input_yemb = tf.nn.dropout(input_yemb, config.keep_prob) with tf.variable_scope("encode_x"): self.x_output_fw,self.x_output_bw,self.x_state_fw,self.x_state_bw=self.my_bidirectional_dynamic_rnn(input_xemb,self.x_mask) self.x_output=tf.concat([self.x_output_fw,self.x_output_bw],2) with tf.variable_scope("encode_y"): self.y_output_fw,self.y_output_bw,self.y_state_fw,self.y_state_bw=self.my_bidirectional_dynamic_rnn(input_yemb,self.y_mask) self.y_output=tf.concat([self.y_output_fw,self.y_output_bw],2) #if is_training and config.keep_prob < 1: # self.x_output = tf.nn.dropout(self.x_output,config.keep_prob) # its length must be x_length # self.y_output = tf.nn.dropout(self.y_output, config.keep_prob) with tf.variable_scope("dot-product-atten"): #weightd_y:(b,x_len,2*h),weighted_x:(b,y_len,2*h) self.weighted_y, self.weighted_x =self.dot_product_attention(x_sen=self.x_output,y_sen=self.y_output,x_len = self.config.xmaxlen,y_len=self.config.ymaxlen) with tf.variable_scope("collect-info"): diff_xy = tf.subtract(self.x_output,self.weighted_y) #Returns x - y element-wise. diff_yx = tf.subtract(self.y_output,self.weighted_x) mul_xy = tf.multiply(self.x_output,self.weighted_y) mul_yx = tf.multiply(self.y_output, self.weighted_x) m_xy = tf.concat([self.x_output,self.weighted_y,diff_xy,mul_xy],axis=2) #(b,x_len,8*h) m_yx = tf.concat ([self.y_output,self.weighted_x,diff_yx,mul_yx],axis=2) #(b,y_len,8*h) m_xy = self.tensordot(inp=m_xy, out_dim= self.config.hidden_units, activation=tf.nn.relu, use_bias=True, w_name="fnn-mxy_W") m_yx = self.tensordot(inp=m_yx, out_dim= self.config.hidden_units, activation=tf.nn.relu, use_bias=True, w_name="fnn-myx_W") if is_training and config.keep_prob < 1: m_xy = tf.nn.dropout(m_xy,config.keep_prob) m_yx = tf.nn.dropout(m_yx,config.keep_prob) with tf.variable_scope("composition"): with tf.variable_scope("encode_mxy"): mxy_output_fw,mxy_output_bw, _,_= self.my_bidirectional_dynamic_rnn(m_xy,self.x_mask) mxy_output=tf.concat([mxy_output_fw,mxy_output_bw],2) #(b,xmaxlen,2*h) with tf.variable_scope("encode_myx"): myx_output_fw,myx_output_bw,_,_ = self.my_bidirectional_dynamic_rnn(m_yx,self.y_mask) myx_output=tf.concat([myx_output_fw,myx_output_bw],2) #(b,ymaxlen,2*h) with tf.variable_scope("pooling"): #irrelevant with seq_len,keep the final dims v_xymax = tf.reduce_max(mxy_output,axis=1) #(b,2h) v_xy_sum = tf.reduce_sum(mxy_output, 1) #(b,x_len.2*h) ->(b,2*h) v_xyave = tf.div(v_xy_sum, tf.expand_dims(self.x_len, -1)) #div true length v_yxmax = tf.reduce_max(myx_output,axis=1) #(b,2h) v_yx_sum = tf.reduce_sum(myx_output, 1) ##(b,y_len.2*h) ->(b,2*h) v_yxave = tf.div(v_yx_sum, tf.expand_dims(self.y_len, -1)) #div true length #v_xyave = tf.reduce_mean(mxy_output,axis=1) #(b,2h) #v_yxave = tf.reduce_mean(myx_output,axis=1) #(b,2h) self.v = tf.concat([v_xyave,v_xymax,v_yxmax,v_yxave],axis=-1) #(b,8*h) if is_training and config.keep_prob < 1: self.v = tf.nn.dropout(self.v, config.keep_prob) with tf.variable_scope("pred-layer"): fnn1 = self.fnn(input=self.v, out_dim=self.config.hidden_units, activation=tf.nn.tanh, use_bias=True, w_name="fnn-pred-W") if is_training and config.keep_prob < 1: fnn1 = tf.nn.dropout(fnn1, config.keep_prob) W_pred = tf.get_variable("W_pred", shape=[self.config.hidden_units, 3],regularizer=l2_regularizer(self.config.l2_strength)) self.pred = tf.nn.softmax(tf.matmul(fnn1, W_pred), name="pred") correct = tf.equal(tf.argmax(self.pred,1),tf.argmax(self.label,1)) self.acc = tf.reduce_mean(tf.cast(correct, "float"), name="accuracy") self.loss_term = -tf.reduce_sum(tf.cast(self.label,tf.float32) * tf.log(self.pred),name="loss_term") self.reg_term = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES),name="reg_term") self.loss = tf.add(self.loss_term,self.reg_term,name="loss") if not is_training: return with tf.variable_scope("bp_layer"): tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), config.max_grad_norm) optimizer = tf.train.AdamOptimizer(config.learning_rate) self.optim = optimizer.apply_gradients( zip(grads, tvars), global_step=self.global_step) _ = tf.summary.scalar("loss", self.loss)
def resnn(self, sequences): """Build the resnn model. Args: page_batch: Sequences returned from inputs_train() or inputs_eval. Returns: Logits. """ self.model_conf() # [batch_size, html_len, 1, we_dim] target_expanded = tf.expand_dims(sequences, 2) # First convolution with tf.variable_scope('conv_layer1'): net = self.conv1d(target_expanded, self.groups[0].num_ker, 7, 2) # if self.special_first: net = self.BN_ReLU(net) # Max pool net = tf.nn.max_pool(net, [1, 3, 1, 1], strides=[1, 2, 1, 1], padding='SAME') if self.ror_l1: net_l1 = net # stacking Residual Units for group_i, group in enumerate(self.groups): if self.ror_l2: net_l2 = net for unit_i in range(group.num_units): net = self.residual_unit(net, group_i, unit_i) if self.ror_l2: # this is necessary to prevent loss exploding net_l2 = self.BN_ReLU(net_l2) net_l2 = self.conv1d(net_l2, self.groups[group_i].num_ker, self.bott_size13, 2) net = net + net_l2 if self.ror_l1: net_l1 = self.BN_ReLU(net_l1) net_l1 = self.conv1d(net_l1, self.groups[-1].num_ker, self.bott_size13, 2 **len(self.groups)) net = net + net_l1 # an extra activation before average pooling if self.special_first: with tf.variable_scope('special_BN_ReLU'): net = self.BN_ReLU(net) # padding should be VALID for global average pooling # output: batch*1*1*channels net_shape = net.get_shape().as_list() net = tf.nn.avg_pool(net, ksize=[1, net_shape[1], net_shape[2], 1], strides=[1, 1, 1, 1], padding='VALID') net_shape = net.get_shape().as_list() softmax_len = net_shape[1] * net_shape[2] * net_shape[3] net = tf.reshape(net, [-1, softmax_len]) # add dropout if self.dropout: with tf.name_scope("dropout"): net = tf.nn.dropout(net, self.dropout_keep_prob) # 1D-fully connected nueral network with tf.variable_scope('FC-layer'): net = fully_connected( net, num_outputs=self.num_cats, activation_fn=None, normalizer_fn=None, weights_initializer=variance_scaling_initializer(), weights_regularizer=l2_regularizer(self.weight_decay), biases_initializer=tf.zeros_initializer, ) return net
def build_q_network(self, hiddens): out = self._inputs for hidden in hiddens: out= layers.fully_connected(inputs=out, num_outputs= hidden, activation_fn=tf.tanh, weights_regularizer=layers.l2_regularizer(scale=0.1)) out = tf.nn.dropout(out, self.keep_prob) self.Q_t = layers.fully_connected(out, self.num_actions, activation_fn=None) self.Q_action = tf.argmax(self.Q_t, dimension=1)
#!/usr/bin/env python
def __init__(self, data, training=False): self.data = data reg = l2_regularizer(0.1) q_mask = make_mask(self.data.ql, 25) # (1, L_q, E) s_mask = make_mask(self.data.sl, 29) # (N, L_s, E) a_mask = make_mask(self.data.al, 34) # (5, L_a, E) ques_shape = tf.shape(q_mask) subt_shape = tf.shape(s_mask) ans_shape = tf.shape(a_mask) with tf.variable_scope('Embedding'): self.embedding = tf.get_variable('embedding_matrix', initializer=np.load( _mp.embedding_file), trainable=False) self.ques = tf.nn.embedding_lookup(self.embedding, self.data.ques) # (1, L_q, E) self.ans = tf.nn.embedding_lookup(self.embedding, self.data.ans) # (5, L_a, E) self.subt = tf.nn.embedding_lookup(self.embedding, self.data.subt) # (N, L_s, E) self.ques = dropout(self.ques, training=training) # (1, L_q, E) self.ans = dropout(self.ans, training=training) # (5, L_a, E) self.subt = dropout(self.subt, training=training) # (N, L_s, E) with tf.variable_scope('Embedding_Linear', regularizer=reg): self.ques_embedding = dropout( tf.layers.dense(self.ques, hp['emb_dim'], activation=tf.nn.tanh, use_bias=False), training) # (1, L_q, E_t) self.ans_embedding = dropout( tf.layers.dense(self.ans, hp['emb_dim'], activation=tf.nn.tanh, use_bias=False, reuse=True), training) # (5, L_a, E_t) self.subt_embedding = dropout( tf.layers.dense(self.subt, hp['emb_dim'], activation=tf.nn.tanh, use_bias=False, reuse=True), training) # (N, L_s, E_t) with tf.variable_scope('Language_Encode', regularizer=reg): self.ques_enc = conv_encode(self.ques_embedding, self.data.ql, 'question') # (1, 1, E_t) self.subt_enc = conv_encode(self.subt_embedding, self.data.sl, 'subtitle') # (N, 1, E_t) self.ans_enc = conv_encode(self.ans_embedding, self.data.al, 'answer') # (5, 1, E_t) with tf.variable_scope('Language_Attention', regularizer=reg): shape = tf.shape(self.subt_embedding) q = tf.tile(self.ques_enc, [shape[0], shape[1], 1]) # (N, L_s, E_t) q = tf.where(s_mask, q, tf.zeros_like(self.subt_embedding)) # (N, L_s, E_t) self.sq_concat = tf.concat([self.subt_embedding, q], axis=-1) # (N, L_s, 2 * E_t) self.lang_attn = tf.layers.conv1d( self.sq_concat, filters=hp['feat_dim'], kernel_size=3, padding='same', activation=tf.nn.relu) # (N, L_s, E_t) self.lang_attn = tf.layers.conv1d(self.lang_attn, filters=1, kernel_size=5, padding='same', dilation_rate=2, activation=None) # (N, L_s, 1) self.lang_attn = tf.nn.softmax(self.lang_attn, axis=1) # (N, L_s, 1) self.subt_attn_enc = safe_mean(self.subt_embedding * (1 + self.lang_attn), self.data.sl) # (N, 1, E_t) alpha = tf.layers.dense(self.ques_enc, 1, activation=tf.nn.sigmoid) # (1, 1, 1) self.subt_sum = alpha * self.subt_enc + ( 1 - alpha) * self.subt_attn_enc # (N, 1, E_t) with tf.variable_scope('Temporal_Attention', regularizer=reg): self.vs_concat = tf.transpose( tf.concat( [self.subt_sum, tf.tile(self.ques_enc, [shape[0], 1, 1])], axis=-1), [1, 0, 2]) # (1, N, 2 * E_t) self.temp_attn = tf.layers.conv1d( self.vs_concat, filters=hp['feat_dim'], kernel_size=5, padding='same', activation=tf.nn.relu) # (1, N, E_t) self.temp_attn = tf.layers.conv1d( self.temp_attn, filters=hp['feat_dim'] / 4, kernel_size=7, dilation_rate=2, padding='same', activation=tf.nn.relu) # (1, N, E_t / 4) self.focus1 = tf.layers.conv1d(self.temp_attn, filters=1, kernel_size=9, dilation_rate=3, padding='same', activation=None) # (1, N, 1) self.focus2 = tf.layers.conv1d(self.temp_attn, filters=1, kernel_size=9, dilation_rate=3, padding='same', activation=None) # (1, N, 1) self.subt_temp1 = tf.transpose( self.subt_sum, [1, 0, 2]) * tf.nn.softmax( self.focus1, axis=1) # (1, N, E_t) self.subt_temp2 = tf.transpose( self.subt_sum, [1, 0, 2]) * tf.nn.softmax( self.focus2, axis=1) # (1, N, E_t) with tf.variable_scope('Answer', regularizer=reg): beta = tf.layers.dense(self.ques_enc, 1, activation=tf.nn.sigmoid) # (1, 1, 1) self.summarize = tf.reduce_sum(beta * self.subt_temp1 + (1 - beta) * self.subt_temp2, axis=1) # (1, E_t) gamma = tf.get_variable('gamma', [1, 1], initializer=tf.zeros_initializer) self.ans_vec = self.summarize * tf.nn.sigmoid(gamma) + \ tf.squeeze(self.ques_enc, axis=0) * (1 - tf.nn.sigmoid(gamma)) # (1, E_t) self.output = tf.transpose( tf.reduce_sum(self.ans_vec * tf.squeeze(self.ans_enc), axis=1, keepdims=True)) # (1, 5)
def embedding_cnn_model_projection_distance(sen_dim, vocab_dim, word_dim): x = tf.placeholder(dtype=tf.int32, shape=[None, 2, sen_dim], name="input") y = tf.placeholder(dtype=tf.float32, shape=[None, 2], name="label") intialembedding = tf.random_normal(shape=[vocab_dim, word_dim], mean=0.0, stddev=0.1) word_vector_table = tf.Variable(intialembedding, name='word_vector') tf.summary.histogram(name='embedding', values=word_vector_table) no_info_vector = tf.slice(word_vector_table, [0, 0], [1, -1]) loss_no_info = tf.reduce_sum(no_info_vector * no_info_vector) norm_word_embedding = tf.slice(word_vector_table, [1, 0], [vocab_dim - 1, word_dim]) norm_sub = tf.reduce_sum(norm_word_embedding * norm_word_embedding, 1) - 1.0 loss_norm = tf.reduce_mean(norm_sub * norm_sub) all_embedding = tf.nn.embedding_lookup(word_vector_table, x) left_embeddings = tf.slice(all_embedding, [0, 0, 0, 0], [-1, 1, -1, -1]) right_embeddings = tf.slice(all_embedding, [0, 1, 0, 0], [-1, 1, -1, -1]) left_embeddings = tf.reshape(left_embeddings, [-1, sen_dim, word_dim]) left_conv_embeddings_2 = tf.layers.conv1d( left_embeddings, filters=40, kernel_size=3, strides=1, activation=tf.nn.relu, kernel_regularizer=l2_regularizer(0.5), bias_regularizer=l2_regularizer(0.5), kernel_initializer=tf.glorot_normal_initializer(), bias_initializer=tf.glorot_normal_initializer(), padding='SAME', name='conv_1d', reuse=None) left_conv_embeddings_2_residual = tf.layers.conv1d( left_conv_embeddings_2, filters=40, kernel_size=3, strides=1, activation=tf.nn.relu, kernel_regularizer=l2_regularizer(0.5), bias_regularizer=l2_regularizer(0.5), kernel_initializer=tf.glorot_normal_initializer(), bias_initializer=tf.glorot_normal_initializer(), padding='SAME', name='conv_1d_2', reuse=None) left_conv_embeddings_2 = left_conv_embeddings_2_residual + left_conv_embeddings_2_residual left_embedding = tf.reduce_sum(left_conv_embeddings_2, axis=1) right_embeddings = tf.reshape(right_embeddings, [-1, sen_dim, word_dim]) right_conv_embeddings_2 = tf.layers.conv1d( right_embeddings, filters=40, kernel_size=3, strides=1, activation=tf.nn.relu, kernel_regularizer=l2_regularizer(0.5), bias_regularizer=l2_regularizer(0.5), kernel_initializer=tf.glorot_normal_initializer(), bias_initializer=tf.glorot_normal_initializer(), padding='SAME', name='conv_1d', reuse=True) right_conv_embeddings_residual = tf.layers.conv1d( right_conv_embeddings_2, filters=40, kernel_size=3, strides=1, activation=tf.nn.relu, kernel_regularizer=l2_regularizer(0.5), bias_regularizer=l2_regularizer(0.5), kernel_initializer=tf.glorot_normal_initializer(), bias_initializer=tf.glorot_normal_initializer(), padding='SAME', name='conv_1d_2', reuse=True) right_conv_embeddings_2 = right_conv_embeddings_2 + right_conv_embeddings_residual right_embedding = tf.reduce_sum(right_conv_embeddings_2, axis=1) concat_embedding = tf.concat([left_embedding, right_embedding], axis=1) dense_1 = tf.layers.dense(concat_embedding, units=50, activation=tf.nn.relu) dense_2 = tf.layers.dense(dense_1, units=10, activation=tf.nn.relu) regulizer_loss = tf.reduce_sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) pred = tf.layers.dense(dense_2, units=2, activation=tf.nn.sigmoid) loss = tf.losses.mean_squared_error( y, pred) + loss_no_info + 0.1 * loss_norm + 0.007 * regulizer_loss optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss) return {'x': x, 'y': y, 'loss': loss, 'pred': pred, 'opt': optimizer}
def f_net(inputs, num_outputs, is_training): inputs = inputs / 128 - 1.0 conv1 = layers.conv2d(inputs=inputs, filters=16, kernel_size=(8, 8), strides=1, kernel_regularizer=l2_regularizer(scale=1e-2), name='conv1') pool1 = layers.max_pooling2d(inputs=conv1, pool_size=3, strides=4, name='pool1') conv2 = layers.conv2d(inputs=pool1, filters=16, kernel_size=(5, 5), strides=1, kernel_regularizer=l2_regularizer(scale=1e-2), name='conv2') pool2 = layers.max_pooling2d(inputs=conv2, pool_size=3, strides=3, name='pool2') conv3 = layers.conv2d(inputs=pool2, filters=64, kernel_size=(3, 3), strides=1, kernel_regularizer=l2_regularizer(scale=1e-2), name='conv3') pool3 = layers.max_pooling2d( inputs=conv3, pool_size=3, strides=8, name='pool3', ) conv4 = layers.conv2d(inputs=pool3, filters=64, kernel_size=(3, 3), strides=1, kernel_regularizer=l2_regularizer(scale=1e-2), name='conv4') pool4 = layers.max_pooling2d(inputs=conv4, pool_size=3, strides=8, name='pool4') depth = pool4.get_shape()[1:].num_elements() inputs = tf.reshape(pool4, shape=[-1, depth]) hid1 = layers.dense(inputs=inputs, units=256, activation=tf.nn.relu, kernel_regularizer=l2_regularizer(scale=1e-2), name='hid1') hid2 = layers.dense(inputs=hid1, units=256, activation=tf.nn.relu, kernel_regularizer=l2_regularizer(scale=1e-2), name='hid2') q = layers.dense(inputs=hid2, units=num_outputs, activation=None, kernel_regularizer=l2_regularizer(scale=1e-2), name='q') q = tf.squeeze(q, name='out_sqz') return q
def direct2pred(self,arg1): ''' args:arg1: a 2D tensor of shape (batch_size,hidden_units) function: softmax(W*arg1) return: pred ''' with tf.variable_scope("direct2predict_layer"): h_predict= arg1 W_pred = tf.get_variable("concat_W_pred", shape=[self.config.hidden_units, 3],regularizer=l2_regularizer(self.config.l2_strength)) pred = tf.nn.softmax(tf.matmul(h_predict, W_pred), name="pred") return pred
def test_downsampling(self): """ Sets up the network's forward pass and ensures that all shapes are expected. """ height = 32 width = 30 num_features = 16 batch_size = 3 num_output_features = 8 specs = [[8, 1], [64, 1], [32, 1], [num_output_features, 1]] # Create the graph. name = 'downsampling_connection' connection = DownSamplingConnection(name, specs, regularizer=l2_regularizer(1e-4)) input_features_tensor = tf.placeholder( shape=[None, height, width, num_features], dtype=tf.float32) output = connection.get_forward(input_features_tensor) input_features = np.zeros( shape=[batch_size, height, width, num_features], dtype=np.float32) input_features[:, 0:height - 4, 0:width - 3, :] = 1.0 self.sess.run(tf.global_variables_initializer()) query = [output] results = self.sess.run( query, feed_dict={input_features_tensor: input_features}) self.assertEqual(len(results), 1) output_np = results[0] self.assertTrue( np.allclose( output_np.shape, np.asarray( [batch_size, height / 2, width / 2, num_output_features]))) self.assertNotEqual(np.sum(output_np), 0.0) # Test regularization losses. # len(specs) conv layers x2 (bias and kernels). reg_losses = tf.losses.get_regularization_losses(scope=name) self.assertEqual(len(reg_losses), 2 * len(specs)) # Make sure the reg losses aren't 0. reg_loss_sum_tensor = tf.add_n(reg_losses) reg_loss_sum = self.sess.run(reg_loss_sum_tensor) self.assertNotEqual(reg_loss_sum, 0.0) self.assertEqual(reg_losses[0].name, name + '/conv_0/kernel/Regularizer/l2_regularizer:0') # Test that we have all the trainable variables. trainable_vars = tf.trainable_variables(scope=name) self.assertEqual(len(trainable_vars), 2 * len(specs)) self.assertEqual(trainable_vars[2].name, name + '/conv_1/kernel:0') # Test that getting forward again with tf.AUTO_REUSE will not increase the number of variables. num_trainable_vars = len(tf.trainable_variables()) connection.get_forward(input_features_tensor, reuse_variables=tf.AUTO_REUSE) self.assertEqual(num_trainable_vars, len(tf.trainable_variables()))
def __init__(self, wd, resnet_size, bottleneck, num_classes, num_filters, kernel_size, conv_stride, first_pool_size, first_pool_stride, block_sizes, block_strides, feature_dim, resnet_version=DEFAULT_VERSION, data_format=None, dtype=DEFAULT_DTYPE): """Creates a model for classifying an image. Args: wd: The co-efficient of weight decay. resnet_size: A single integer for the size of the ResNet model. bottleneck: Use regular blocks or bottleneck blocks. num_classes: The number of classes used as labels. num_filters: The number of filters to use for the first block layer of the model. This number is then doubled for each subsequent block layer. kernel_size: The kernel size to use for convolution. conv_stride: stride size for the initial convolutional layer first_pool_size: Pool size to be used for the first pooling layer. If none, the first pooling layer is skipped. first_pool_stride: stride size for the first pooling layer. Not used if first_pool_size is None. block_sizes: A list containing n values, where n is the number of sets of block layers desired. Each value should be the number of blocks in the i-th set. block_strides: List of integers representing the desired stride size for each of the sets of block layers. Should be same length as block_sizes. feature_dim: the dimension of the representation space. resnet_version: Integer representing which version of the ResNet network to use. See README for details. Valid values: [1, 2] data_format: Input format ('channels_last', 'channels_first', or None). If set to None, the format is dependent on whether a GPU is available. dtype: The TensorFlow dtype to use for calculations. If not specified tf.float32 is used. Raises: ValueError: if invalid version is selected. """ self.resnet_size = resnet_size if not data_format: data_format = ('channels_first' if tf.test.is_built_with_cuda() else 'channels_last') self.resnet_version = resnet_version if resnet_version not in (1, 2): raise ValueError( 'Resnet version should be 1 or 2. See README for citations.') self.bottleneck = bottleneck if bottleneck: if resnet_version == 1: self.block_fn = _bottleneck_block_v1 else: self.block_fn = _bottleneck_block_v2 else: if resnet_version == 1: self.block_fn = _building_block_v1 else: self.block_fn = _building_block_v2 if dtype not in ALLOWED_TYPES: raise ValueError('dtype must be one of: {}'.format(ALLOWED_TYPES)) self.data_format = data_format self.num_classes = num_classes self.num_filters = num_filters self.kernel_size = kernel_size self.conv_stride = conv_stride self.first_pool_size = first_pool_size self.first_pool_stride = first_pool_stride self.block_sizes = block_sizes self.block_strides = block_strides self.dtype = dtype self.pre_activation = resnet_version == 2 self.regularizer = contrib_layers.l2_regularizer(scale=wd) self.initializer = contrib_layers.xavier_initializer() self.drop_rate = 0.5 self.feature_dim = feature_dim
def build(self, sequence_length, spatial_fully_connected_size, temporal_fully_connected_layers, labels, positive_class_weight, learning_rate, weight_decay=0.0, is_training=False): bn_params = { 'decay': 0.999, 'center': True, 'scale': True, 'epsilon': 0.001, 'updates_collections': None, 'is_training': is_training, } self.sequence_new = tf.placeholder( tf.float32, shape=(1, spatial_fully_connected_size), name='_sequence_new_ph') self.sequence = tf.Variable(np.zeros( (sequence_length, spatial_fully_connected_size)), dtype=tf.float32, trainable=False, name='_sequence_var') self.sequence_gradient = tf.Variable(np.zeros( (sequence_length, spatial_fully_connected_size)), dtype=tf.float32, trainable=False, name='_sequence_grad') self.add_sequence_new_op = self.sequence.assign( tf.concat([ tf.slice(self.sequence, begin=[1, 0], size=[-1, -1]), self.sequence_new ], 0)) self.add_sequence_gradient_new_op = self.sequence_gradient.assign( tf.concat([ tf.slice( self.sequence_gradient, begin=[1, 0], size=[-1, -1]), tf.zeros_like(self.sequence_new) ], 0)) net = tf.reshape( self.sequence, (-1, sequence_length * spatial_fully_connected_size)) with tf.control_dependencies( [self.add_sequence_new_op, self.add_sequence_gradient_new_op]): with tf.contrib.framework.arg_scope( [layers.fully_connected], activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=bn_params, weights_initializer=layers. variance_scaling_initializer(), weights_regularizer=layers.l2_regularizer( weight_decay)): layer_num = 1 for fully_connected_num in temporal_fully_connected_layers: net = layers.fully_connected( net, fully_connected_num, scope='temporal_FC{}'.format(layer_num)) layer_num += 1 self.logits = layers.fully_connected( net, 2, activation_fn=None, weights_initializer=layers.xavier_initializer(), weights_regularizer=layers.l2_regularizer(weight_decay), biases_initializer=tf.zeros_initializer(), scope='logits') softmax = tf.nn.softmax(self.logits, name='_softmax') coefficients = tf.constant([0.001, 0.999]) cross_entropy = -tf.reduce_sum(tf.multiply( tf.one_hot(labels, depth=2) * tf.log(softmax + 1e-7), coefficients), reduction_indices=[1]) xent_loss = tf.reduce_mean(cross_entropy, name='_temporal_loss') regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) self.loss = tf.add_n([xent_loss] + regularization_losses) self.labels = labels if is_training: self.trainer = tf.train.AdamOptimizer(learning_rate) self.train_op = self.trainer.minimize(self.loss) self.sequence_gradient_new = tf.gradients( self.loss, [self.sequence])[0] self.add_sequence_gradient_new = tf.assign_add( self.sequence_gradient, self.sequence_gradient_new)
def build(inputs, labels, weights, is_training=True): vgg_layers, vgg_layer_names = read_vgg_init(FLAGS.vgg_init_dir) weight_decay = 5e-4 bn_params = { # Decay for the moving averages. 'decay': 0.999, 'center': True, 'scale': True, # epsilon to prevent 0s in variance. 'epsilon': 0.001, # None to force the updates 'updates_collections': None, 'is_training': is_training, } start_time = time.time() with tf.contrib.framework.arg_scope( [layers.convolution2d], kernel_size=3, stride=1, padding='SAME', rate=1, activation_fn=tf.nn.relu, # normalizer_fn = layers.batch_norm, normalizer_params = bn_params, # weights_initializer = layers.variance_scaling_initializer(), normalizer_fn=None, weights_initializer=None, weights_regularizer=layers.l2_regularizer(weight_decay)): net = layers.convolution2d(inputs, 64, scope='conv1_1') net = layers.convolution2d(net, 64, scope='conv1_2') net = layers.max_pool2d(net, 2, 2, scope='pool1') net = layers.convolution2d(net, 128, scope='conv2_1') net = layers.convolution2d(net, 128, scope='conv2_2') net = layers.max_pool2d(net, 2, 2, scope='pool2') net = layers.convolution2d(net, 256, scope='conv3_1') net = layers.convolution2d(net, 256, scope='conv3_2') net = layers.convolution2d(net, 256, scope='conv3_3') paddings = [[0, 0], [0, 0]] crops = [[0, 0], [0, 0]] block_size = 2 net = tf.space_to_batch(net, paddings=paddings, block_size=block_size) net = layers.convolution2d(net, 512, scope='conv4_1') net = layers.convolution2d(net, 512, scope='conv4_2') net = layers.convolution2d(net, 512, scope='conv4_3') net = tf.batch_to_space(net, crops=crops, block_size=block_size) block_size = 4 net = tf.space_to_batch(net, paddings=paddings, block_size=block_size) net = layers.convolution2d(net, 512, scope='conv5_1') net = layers.convolution2d(net, 512, scope='conv5_2') net = layers.convolution2d(net, 512, scope='conv5_3') net = tf.batch_to_space(net, crops=crops, block_size=block_size) with tf.contrib.framework.arg_scope( [layers.convolution2d], stride=1, padding='SAME', weights_initializer=layers.variance_scaling_initializer(), activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=bn_params, weights_regularizer=layers.l2_regularizer(FLAGS.weight_decay)): net = layers.convolution2d(net, 512, kernel_size=3, scope='conv6_1', rate=4) logits = layers.convolution2d(net, FLAGS.num_classes, 1, padding='SAME', activation_fn=None, scope='unary_2', rate=2) print('logits', logits.get_shape()) logits = tf.image.resize_bilinear(logits, [FLAGS.img_height, FLAGS.img_width], name='resize_score') loss = get_loss(logits, labels, weights, is_training=is_training) print(("build model finished in: %ds" % (time.time() - start_time))) if is_training: init_op, init_feed = create_init_op(vgg_layers) return logits, loss, init_op, init_feed return logits, loss
def build(self, sequence_length, spatial_fully_connected_size, inputs, learning_rate, weight_decay, vgg_init_dir, is_training): bn_params = { 'decay': 0.999, 'center': True, 'scale': True, 'epsilon': 0.001, 'updates_collections': None, 'is_training': is_training, } input_shape = inputs.get_shape() horizontal_slice_size = int(round(int(input_shape[2]) / 3)) vertical_slice_size = int(round(int(input_shape[1]) / 3)) inputs = tf.slice(inputs, begin=[0, vertical_slice_size, 0, 0], size=[-1, -1, horizontal_slice_size * 2, -1]) self.final_gradient = tf.placeholder( tf.float32, shape=(1, spatial_fully_connected_size), name='_final_gradient_ph') self.handles = [None] * sequence_length with tf.contrib.framework.arg_scope( [layers.convolution2d], kernel_size=3, stride=1, padding='SAME', rate=1, activation_fn=tf.nn.relu, normalizer_fn=None, weights_initializer=None, weights_regularizer=layers.l2_regularizer(weight_decay)): net = layers.convolution2d(inputs, 64, scope='conv1_1') net = layers.convolution2d(net, 64, scope='conv1_2') net = layers.max_pool2d(net, 2, 2, scope='pool1') net = layers.convolution2d(net, 128, scope='conv2_1') net = layers.convolution2d(net, 128, scope='conv2_2') net = layers.max_pool2d(net, 2, 2, scope='pool2') net = layers.convolution2d(net, 256, scope='conv3_1') net = layers.convolution2d(net, 256, scope='conv3_2') net = layers.convolution2d(net, 256, scope='conv3_3') net = layers.max_pool2d(net, 2, 2, scope='pool3') net = layers.convolution2d(net, 512, scope='conv4_1') net = layers.convolution2d(net, 512, scope='conv4_2') net = layers.convolution2d(net, 512, scope='conv4_3') net = layers.max_pool2d(net, 2, 2, scope='pool4') net = layers.convolution2d(net, 512, scope='conv5_1') net = layers.convolution2d(net, 512, scope='conv5_2') net = layers.convolution2d(net, 512, scope='conv5_3', normalizer_fn=layers.batch_norm, normalizer_params=bn_params) net = layers.max_pool2d(net, 2, 2, scope='pool5') net = layers.flatten(net) with tf.contrib.framework.arg_scope( [layers.fully_connected], activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=bn_params, weights_initializer=layers.variance_scaling_initializer(), weights_regularizer=layers.l2_regularizer(weight_decay)): net = layers.fully_connected(net, spatial_fully_connected_size, scope='spatial_FC') self.representation = layers.flatten(net) self.loss = tf.matmul(self.representation, tf.transpose(self.final_gradient), name='_spatial_loss') self.partial_run_setup_objs = [self.representation, self.loss] if is_training: self.trainer = tf.train.AdamOptimizer(learning_rate) self.train_op = self.trainer.minimize(self.loss) with tf.control_dependencies([self.train_op]): self.with_train_op = self.loss self.partial_run_setup_objs.append(self.train_op) vgg_layers, vgg_layer_names = read_vgg_init(vgg_init_dir) init_op, init_feed, pretrained_vars = create_init_op(vgg_layers) self.pretrained_vars = pretrained_vars self.vgg_init = (init_op, init_feed)
import tensorflow as tf from tensorflow.contrib import layers #定义L1正则化项,正则化系数为0.1 regularizer1 = layers.l1_regularizer(0.1) #定义L2正则化项,正则化系数为0.05 regularizer2 = layers.l2_regularizer(0.05) #定义模型第一个参数,命名空间为var/weight,shape为[8],初始值为1,并对改模型参数加入L1正则化项 with tf.variable_scope('var', initializer = tf.random_normal_initializer(), regularizer = regularizer1): weight = tf.get_variable('weight', shape=[8], initializer=tf.ones_initializer()) #定义模型第二个参数,命名空间为var2/weight,shape为[8],初始值为1,并对改模型参数加入L2正则化项 with tf.variable_scope('var2', initializer = tf.random_normal_initializer(), regularizer = regularizer2): weight2 = tf.get_variable('weight', shape = [8], initializer = tf.ones_initializer()) #打印变量集合中包含的模型变量 print(tf.get_collection(tf.GraphKeys.VARIABLES)) #输出模型正则化集合中包含的正则化项目 print(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) #定义损失方程中的正则化损失值,为正则化集合中所有正则化项目数据的聚合值 regularization_loss = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) #打印模型正则化项的Tensor定义 print(regularization_loss)
if avg_class == None: result = tf.matmul(full_5, layer6_full_w) + layer6_full_b else: result = tf.matmul(full_5, avg_class.average(layer6_full_w)) + avg_class.average(layer6_full_b) return result ''' 定义 x,y 以及反向传播的相关参数 ''' x = tf.placeholder(tf.float32, shape=[batch_size, 28, 28, 1], name='x_input') y_ = tf.placeholder(tf.float32, shape=[batch_size, 10], name='y-output') ''' 初始化正则函数 ''' regularizer = layers.l2_regularizer(0.001) ''' 对于使用滑动平均值 resuse设置为True 其他设置为false ''' y = hidden_layer(x, regularizer, avg_class=None, resuse=False) training_step = tf.Variable(0, trainable=False) variable_averages = tf.train.ExponentialMovingAverage(0.99, training_step) variable_averages_op = variable_averages.apply(tf.trainable_variables()) ''' 注意reuse为True在这里 ''' average_y = hidden_layer(x, regularizer, variable_averages, resuse=True) '''
def _build_decoder(self, z1, z2, reuse=False): # consider include ``target'' into args, # since it may be used during training weights_regularizer = l2_regularizer(self._train_conf["l2_weight"]) normalizer_fn = batch_norm if self._model_conf["if_bn"] else None normalizer_params = None if self._model_conf["if_bn"]: normalizer_params = { "scope": "BatchNorm", "is_training": self._feed_dict["is_train"], "reuse": reuse } # TODO: need to upgrade to latest, which # commit support param_regularizers args outputs = tf.concat([z1, z2], axis=1) with tf.variable_scope("dec", reuse=reuse): for i, hu in enumerate(self._model_conf["hu_dec"]): outputs = fully_connected( inputs=outputs, num_outputs=hu, activation_fn=nn.relu, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, weights_regularizer=weights_regularizer, reuse=reuse, scope="dec_fc%s" % (i + 1)) # if no recurrent layers, use dense_latent for target if not self._model_conf["rec_dec"]: target_shape = list(self._model_conf["target_shape"]) target_dim = np.prod(target_shape) if self._model_conf["x_conti"]: mu_nl = self._model_conf["x_mu_nl"] logvar_nl = self._model_conf["x_logvar_nl"] x_mu, x_logvar, x = dense_latent(outputs, target_dim, mu_nl=mu_nl, logvar_nl=logvar_nl, reuse=reuse, scope="dec_lat") x_mu = tf.reshape(x_mu, [-1] + target_shape) x_logvar = tf.reshape(x_logvar, [-1] + target_shape) px = [x_mu, x_logvar] else: raise ValueError # n_bins = self._model_conf["n_bins"] # x_logits, x = cat_dense_latent( # outputs, target_dim, n_bins, reuse=reuse, scope="dec_lat") # x_logits = tf.reshape(x_logits, [-1] + target_shape + [n_bins]) # px = x_logits x = tf.reshape(x, [-1] + target_shape) else: targets = None if self.training: targets = self._feed_dict["targets"] outputs, px, x = self._build_rnn_decoder_and_recon_x( outputs, targets, self.training, reuse) return px, x
def weight_decay(self): return layers.l2_regularizer(1.0)
def model_fn(features, labels, mode, params): """Model function of Deep & Cross network(DCN) for predictive analytics of high dimensional sparse data. Args of dict params: task: str A string, representing the type of task. Note: it must take value from ["binary", "multi", "regression"]; it instruct the type of loss function: "binary": sigmoid cross-entropy; "multi": softmax cross-entropy; "regression": mean squared error. output_size: int An integer scalar, representing the number of output units. Note: it must be correspond to <task>: task == "binary": output_size must be equal to 1; task =="multi": output_size must be equal to the dimension of class distribution; task == "regression": output_size must be equal to 1. field_size_numerical: int An integer scalar, representing the number of numerical fields(also is the number of numerical features) of dataset. Note: it must be consistent with <field_size_numerical> of input function. field_size_categorical: int An integer scalar, representing the number of categorical fields(number of categorical columns before one-hot encoding) of dataset. Note: it must be consistent with <field_size_categorical> of input function. feat_size_categorical: int An integer scalar, representing the number of categorical features(number of categorical columns after one-hot encoding) of dataset. embed_size: int An integer scalar, representing the dimension of embedding vectors for all categorical features. num_cross_hidden_layers: int An integer scalar, representing the number of hidden layers belongs to cross part. deep_hidden_sizes: list A list, containing the number of hidden units of each hidden layer in deep part. Note: it doesn't contain output layer of dnn part. dropouts: list or None If list, containing the dropout rate of each hidden layer in dnn part; If None, don't use dropout operation for any hidden layer. Note: if list, the length of <dropouts> must be equal to <hidden_sizes>. use_global_bias: bool A boolean, instructing whether to use global bias in output part of model inference. use_deep_hidden_bias: bool A boolean, instructing whether to use bias of hidden layer units in deep part of model inference. use_bn: bool A boolean, instructing whether to use batch normalization for each hidden layer in deep part. lamb: float A float scalar, representing the coefficient of regularization term (the larger the value of lamb, the stronger the penalty is). optimizer: str A string, representing the type of optimizer. learning_rate: float A float scalar, representing the learning rate of optimizer. dtype: tf.Dtype A tf.DType, representing the numeric type of values. Note: it must take value from [tf.float32, tf.float64]; it must be consistent with <dtype> of input function. name_feat_vals_numerical: str, optional A string, representing the name of numerical feature values in return dict. Note: it must be consistent with <name_feat_vals_numerical> of input function. name_feat_inds_categorical: str, optional A string, representing the name of categorical feature indices in return dict. Note: it must be consistent with <name_feat_inds_categorical> of input function. reuse: bool A boolean, which takes value from [False, True, tf.AUTO_REUSE]. seed: int or None If integer scalar, representing the random seed of tensorflow; If None, random choice. """ # ----------Declare all hyperparameters from params---------- task = params["task"] output_size = params["output_size"] field_size_numerical = params["field_size_numerical"] field_size_categorical = params["field_size_categorical"] feat_size_categorical = params["feat_size_categorical"] embed_size = params["embed_size"] num_cross_hidden_layers = params["num_cross_hidden_layers"] deep_hidden_sizes = params["deep_hidden_sizes"] dropouts = params["dropouts"] use_global_bias = params["use_global_bias"] use_deep_hidden_bias = params["use_deep_hidden_bias"] use_bn = params["use_bn"] lamb = params["lamb"] optimizer = params["optimizer"] learning_rate = params["learning_rate"] dtype = params["dtype"] name_feat_vals_numerical = params["name_feat_vals_numerical"] name_feat_inds_categorical = params["name_feat_inds_categorical"] reuse = params["reuse"] seed = params["seed"] # -----Hyperparameters for threshold for binary classification task threshold = 0.5 # -----Hyperparameters for exponential decay(*manual optional*)----- decay_steps = 5000 decay_rate = 0.998 staircase = True # -----Hyperparameters for information showing----- name_probability_output = "prob" name_classification_output = "class" name_regression_output = "pred" value_error_warning_task = "Argument of model function <task>: \"{}\" is not supported. It must be in " \ "[\"binary\", \"multi\", \"regression\"]".format(task) value_error_warning_optimizer = "Argument value of <optimizer>: {} is not supported.".format( optimizer) value_error_warning_output_size_and_task = "Argument of model function <output_size>: {}, must be 1 when <task> " \ "is: \"{}\"".format(output_size, task) # ----------Assert for hyperparameters---------- if task == "binary": assert (output_size == 1), value_error_warning_output_size_and_task if task == "regression": assert (output_size == 1), value_error_warning_output_size_and_task if seed != None: tf.set_random_seed(seed=seed) with tf.variable_scope(name_or_scope="inference", reuse=reuse): with tf.name_scope(name="inputs"): valsn = features[name_feat_vals_numerical] indsc = features[ name_feat_inds_categorical] # A tensor in shape of (None, field_size_categorical) batch = tf.shape(input=valsn)[0] dim = field_size_numerical + field_size_categorical * embed_size with tf.name_scope(name="embed-and-stack-layer"): V = tf.get_variable(name="V", shape=[feat_size_categorical, embed_size], dtype=dtype, initializer=xavier_initializer(uniform=False, seed=seed, dtype=dtype), regularizer=None) embed = tf.nn.embedding_lookup( params=V, ids=indsc ) # A tensor in shape of (None, field_size_categorical, embed_size) embed = tf.reshape(tensor=embed, shape=[-1, field_size_categorical * embed_size]) x = tf.reshape(tensor=tf.concat(values=[valsn, embed], axis=-1), shape=[batch, dim]) with tf.name_scope(name="deep-part"): ydeep = x for l in range(len(deep_hidden_sizes)): # -----The order for each hidden layer is: matmul => bn => relu => dropout => matmul => ... ydeep = tf.layers.dense( inputs=ydeep, units=deep_hidden_sizes[l], activation=None, use_bias=use_deep_hidden_bias, kernel_initializer=xavier_initializer(uniform=True, seed=seed, dtype=dtype), bias_initializer=tf.zeros_initializer(dtype=dtype), kernel_regularizer=l2_regularizer(scale=lamb), bias_regularizer=None, name="deep-dense-hidden-{}".format(l)) if use_bn == True: ydeep = tf.layers.batch_normalization( inputs=ydeep, axis=-1, momentum=0.99, # *manual optional* epsilon=1e-3, # *manual optional* center=True, scale=True, beta_initializer=tf.zeros_initializer(dtype=dtype), gamma_initializer=tf.ones_initializer(dtype=dtype), moving_mean_initializer=tf.zeros_initializer( dtype=dtype), moving_variance_initializer=tf.ones_initializer( dtype=dtype), beta_regularizer=None, # *manual optional* gamma_regularizer=None, # *manual optional* training=(mode == tf.estimator.ModeKeys.TRAIN), name="dense-bn-hidden-{}".format(l)) ydeep = tf.nn.relu(features=ydeep) if dropouts != None: ydeep = tf.layers.dropout( inputs=ydeep, rate=dropouts[l], seed=seed, training=(mode == tf.estimator.ModeKeys.TRAIN)) with tf.name_scope(name="cross-part"): Wc = tf.get_variable(name="Wc", shape=[num_cross_hidden_layers, dim], dtype=dtype, initializer=xavier_initializer(uniform=False, seed=seed, dtype=dtype), regularizer=l2_regularizer(scale=lamb)) bc = tf.get_variable(name="bc", shape=[num_cross_hidden_layers, dim], dtype=dtype, initializer=xavier_initializer(uniform=False, seed=seed, dtype=dtype), regularizer=l2_regularizer(scale=lamb)) ycross = x # A tensor in shape of (batch, dim) for l in range(num_cross_hidden_layers): wl = tf.expand_dims(input=Wc[l], axis=1) # A tensor in shape of (dim, 1) bl = bc[l] # A tensor in shape of (dim) xwl = tf.matmul(a=ycross, b=wl) # A tensor in shape of (batch, 1) ycross = tf.multiply(x=x, y=xwl) + ycross + bl with tf.name_scope(name="combine-output"): y = tf.concat(values=[ycross, ydeep], axis=-1) # A tensor in shape of (batch, concat_size) logits = tf.layers.dense( inputs=y, units=output_size, activation=None, use_bias=use_global_bias, kernel_initializer=xavier_initializer(uniform=True, seed=seed, dtype=dtype), bias_initializer=tf.zeros_initializer(dtype=dtype), kernel_regularizer=l2_regularizer(scale=lamb), bias_regularizer=None, name="output") # A tensor in shape of (batch, output_size) if task == "binary": logits = tf.squeeze(input=logits, axis=1) # A tensor in shape of (None) probs = tf.nn.sigmoid(x=logits) classes = tf.cast(x=tf.greater(x=tf.nn.sigmoid(x=logits), y=threshold), dtype=tf.int32) predictions = { name_probability_output: probs, name_classification_output: classes } elif task == "multi": probs_dist = tf.nn.softmax(logits=logits, axis=-1) classes = tf.argmax(input=logits, axis=-1, output_type=tf.int32) predictions = { name_probability_output: probs_dist, name_classification_output: classes } elif task == "regression": logits = tf.squeeze(input=logits, axis=1) # A tensor in shape of (None) predictions = {name_regression_output: logits} else: raise ValueError(value_error_warning_task) # ----------Provide an estimator spec for `ModeKeys.PREDICTION` mode---------- if mode == tf.estimator.ModeKeys.PREDICT: export_outputs = { tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(outputs=predictions) } # For usage of tensorflow serving return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, export_outputs=export_outputs) # ----------Build loss function---------- if task == "binary": loss = tf.reduce_mean( input_tensor=tf.nn.sigmoid_cross_entropy_with_logits( labels=labels, logits=logits), axis=0, keepdims=False ) # A scalar, representing the training loss of current batch training dataset elif task == "regression": loss = tf.reduce_mean( input_tensor=tf.square(x=tf.subtract(x=labels, y=logits)), axis=0, keepdims=False ) # A scalar, representing the training loss of current batch training dataset elif task == "multi": labels_one_hot = tf.one_hot( indices=tf.cast(x=labels, dtype=tf.int32), depth=output_size, axis=-1, dtype=dtype) # A tensor in shape of (None, output_size) loss = tf.reduce_mean( input_tensor=tf.nn.softmax_cross_entropy_with_logits_v2( labels=labels_one_hot, logits=logits), axis=0, keepdims=False ) # A scalar, representing the training loss of current batch training dataset else: raise ValueError(value_error_warning_task) reg = tf.reduce_sum( input_tensor=tf.get_collection(key=tf.GraphKeys.REGULARIZATION_LOSSES), axis=0, keepdims=False, name="regularization" ) # A scalar, representing the regularization loss of current batch training dataset loss += reg # ----------Provide an estimator spec for `ModeKeys.EVAL` mode---------- if mode == tf.estimator.ModeKeys.EVAL: if task == "binary": eval_metric_ops = { "accuracy": tf.metrics.accuracy( labels=labels, predictions=predictions[name_classification_output]), "precision": tf.metrics.precision( labels=labels, predictions=predictions[name_classification_output]), "recall": tf.metrics.recall( labels=labels, predictions=predictions[name_classification_output]), "auc": tf.metrics.auc( labels=labels, predictions=predictions[name_classification_output]) } elif task == "multi": eval_metric_ops = { "confusion-matrix": tf.confusion_matrix( labels=labels, predictions=predictions[name_classification_output], num_classes=output_size) } elif task == "regression": eval_metric_ops = { "rmse": tf.metrics.root_mean_squared_error( labels=labels, predictions=predictions[name_regression_output]) } else: raise ValueError(value_error_warning_task) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, predictions=predictions, eval_metric_ops=eval_metric_ops) # ----------Build optimizer---------- global_step = tf.train.get_or_create_global_step( graph=tf.get_default_graph( )) # Define a global step for training step counter if optimizer == "sgd": opt_op = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) elif optimizer == "sgd-exp-decay": decay_learning_rate = tf.train.exponential_decay( learning_rate=learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=decay_rate, staircase=staircase) opt_op = tf.train.GradientDescentOptimizer( learning_rate=decay_learning_rate) elif optimizer == "momentum": opt_op = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9, use_nesterov=False) elif optimizer == "momentum-exp-decay": decay_learning_rate = tf.train.exponential_decay( learning_rate=learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=decay_rate, staircase=staircase) opt_op = tf.train.MomentumOptimizer(learning_rate=decay_learning_rate, momentum=0.9, use_nesterov=False) elif optimizer == "nesterov": opt_op = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9, use_nesterov=True) elif optimizer == "nesterov-exp-decay": decay_learning_rate = tf.train.exponential_decay( learning_rate=learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=decay_rate, staircase=staircase) opt_op = tf.train.MomentumOptimizer(learning_rate=decay_learning_rate, momentum=0.9, use_nesterov=True) elif optimizer == "adagrad": opt_op = tf.train.AdagradOptimizer(learning_rate=learning_rate, initial_accumulator_value=0.1) elif optimizer == "adadelta": opt_op = tf.train.AdadeltaOptimizer(learning_rate=learning_rate, rho=0.95) elif optimizer == "rmsprop": opt_op = tf.train.RMSPropOptimizer(learning_rate=learning_rate, decay=0.9) elif optimizer == "adam": opt_op = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999) else: raise NotImplementedError(value_error_warning_optimizer) train_op = opt_op.minimize(loss=loss, global_step=global_step, name="train_op") # ----------Provide an estimator spec for `ModeKeys.TRAIN` mode---------- if (mode == tf.estimator.ModeKeys.TRAIN): return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
def _build_graph(self): graph = tf.Graph() tf.reset_default_graph() with graph.as_default(): with tf.name_scope("input"): self.clicked_words = tf.placeholder(dtype=tf.int32, shape=[ None, self.max_click_history, self.max_title_length ], name="clicked_words") self.clicked_entities = tf.placeholder( dtype=tf.int32, shape=[ None, self.max_click_history, self.max_title_length ], name="clicked_entities") self.news_words = tf.placeholder( dtype=tf.int32, shape=[None, self.max_title_length], name="news_words") self.news_entities = tf.placeholder( dtype=tf.int32, shape=[None, self.max_title_length], name="news_entities") self.labels = tf.placeholder(dtype=tf.float32, shape=[ None, ], name="labels") with tf.name_scope("embedding"): self.word_embeddings = tf.Variable(self.word_embs, dtype=tf.float32, name="word") self.entity_embeddings = tf.Variable(self.ent_embs, dtype=tf.float32, name="entity") self.params.append(self.word_embeddings) self.params.append(self.entity_embeddings) if self.use_context: self.context_embeddings = tf.Variable(self.context_embs, dtype=tf.float32, name="context") self.params.append(self.context_embeddings) if self.transform: self.entity_embeddings = tf.layers.dense( inputs=self.entity_embeddings, units=self.entity_dim, activation=tf.nn.tanh, name="transformed_entity", kernel_regularizer=l2_regularizer(self.l2_weight)) if self.use_context: self.context_embeddings = tf.layers.dense( inputs=self.context_embeddings, units=self.entity_dim, activation=tf.nn.tanh, name="transformed_context", kernel_regularizer=l2_regularizer(self.l2_weight)) with tf.name_scope("attention"): # [batch_size * max_click_history, max_title_length] clicked_words = tf.reshape(self.clicked_words, shape=[-1, self.max_title_length]) clicked_entities = tf.reshape( self.clicked_entities, shape=[-1, self.max_title_length]) with tf.variable_scope("kcnn", reuse=tf.AUTO_REUSE): # title_embedding_length =out_channels * n_filters # [batch_size * max_click_history, title_embedding_length] clicked_embeddings = self._kcnn(clicked_words, clicked_entities) # [batch_size, title_embedding_length] news_embeddings = self._kcnn(self.news_words, self.news_entities) # [batch_size, max_click_history, title_embedding_length] clicked_embeddings = tf.reshape( clicked_embeddings, shape=[ -1, self.max_click_history, self.out_channels * len(self.filter_sizes) ]) # [batch_size, 1, title_embedding_length] news_embeddings_expanded = tf.expand_dims(news_embeddings, axis=1) # [batch_size, max_click_history] attention_weights = tf.reduce_sum(clicked_embeddings * news_embeddings_expanded, axis=-1) # [batch_size, max_click_history] attention_weights = tf.nn.softmax(attention_weights, dim=-1) # [batch_size, max_click_history, 1] attention_weights_expanded = tf.expand_dims(attention_weights, axis=-1) # [batch_size, title_embedding_length] user_embeddings = tf.reduce_sum(clicked_embeddings * attention_weights_expanded, axis=1) with tf.name_scope("train"): # [batch_size, ] self.logits = tf.reduce_sum(user_embeddings * news_embeddings, axis=1) self.base_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=self.labels, logits=self.logits)) self.l2_loss = tf.Variable(0., dtype=tf.float32) for param in self.params: self.l2_loss += self.l2_weight * tf.nn.l2_loss(param) if self.transform: self.l2_loss += tf.losses.get_regularization_loss() self.loss = self.base_loss + self.l2_loss self.optimizer = tf.train.AdamOptimizer(self.lr).minimize( self.loss) self.init = tf.global_variables_initializer() return graph
def __call__(self, x, is_training=True): with tf.variable_scope(self.name) as scope: with arg_scope([tcl.batch_norm], is_training=is_training, scale=True): with arg_scope([tcl.conv2d, tcl.conv2d_transpose], activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm, biases_initializer=None, padding='SAME', weights_regularizer=tcl.l2_regularizer(0.0002)): size = 16 # x: s x s x 3 se = tcl.conv2d(x, num_outputs=size, kernel_size=4, stride=1) # 256 x 256 x 16 se = resBlock(se, num_outputs=size * 2, kernel_size=4, stride=2) # 128 x 128 x 32 se = resBlock(se, num_outputs=size * 2, kernel_size=4, stride=1) # 128 x 128 x 32 se = resBlock(se, num_outputs=size * 4, kernel_size=4, stride=2) # 64 x 64 x 64 se = resBlock(se, num_outputs=size * 4, kernel_size=4, stride=1) # 64 x 64 x 64 se = resBlock(se, num_outputs=size * 8, kernel_size=4, stride=2) # 32 x 32 x 128 se = resBlock(se, num_outputs=size * 8, kernel_size=4, stride=1) # 32 x 32 x 128 se = resBlock(se, num_outputs=size * 16, kernel_size=4, stride=2) # 16 x 16 x 256 se = resBlock(se, num_outputs=size * 16, kernel_size=4, stride=1) # 16 x 16 x 256 se = resBlock(se, num_outputs=size * 32, kernel_size=4, stride=2) # 8 x 8 x 512 se = resBlock(se, num_outputs=size * 32, kernel_size=4, stride=1) # 8 x 8 x 512 pd = tcl.conv2d_transpose(se, size * 32, 4, stride=1) # 8 x 8 x 512 pd = tcl.conv2d_transpose(pd, size * 16, 4, stride=2) # 16 x 16 x 256 pd = tcl.conv2d_transpose(pd, size * 16, 4, stride=1) # 16 x 16 x 256 pd = tcl.conv2d_transpose(pd, size * 16, 4, stride=1) # 16 x 16 x 256 pd = tcl.conv2d_transpose(pd, size * 8, 4, stride=2) # 32 x 32 x 128 pd = tcl.conv2d_transpose(pd, size * 8, 4, stride=1) # 32 x 32 x 128 pd = tcl.conv2d_transpose(pd, size * 8, 4, stride=1) # 32 x 32 x 128 pd = tcl.conv2d_transpose(pd, size * 4, 4, stride=2) # 64 x 64 x 64 pd = tcl.conv2d_transpose(pd, size * 4, 4, stride=1) # 64 x 64 x 64 pd = tcl.conv2d_transpose(pd, size * 4, 4, stride=1) # 64 x 64 x 64 pd = tcl.conv2d_transpose(pd, size * 2, 4, stride=2) # 128 x 128 x 32 pd = tcl.conv2d_transpose(pd, size * 2, 4, stride=1) # 128 x 128 x 32 pd = tcl.conv2d_transpose(pd, size, 4, stride=2) # 256 x 256 x 16 pd = tcl.conv2d_transpose(pd, size, 4, stride=1) # 256 x 256 x 16 pd = tcl.conv2d_transpose(pd, 3, 4, stride=1) # 256 x 256 x 3 pd = tcl.conv2d_transpose(pd, 3, 4, stride=1) # 256 x 256 x 3 pos = tcl.conv2d_transpose( pd, 3, 4, stride=1, activation_fn=tf.nn.sigmoid ) #, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) return pos
def _myForwardPass(self): cnn_feats = self._ph.cnn_feats pred_polys = self._ph.pred_polys pred_mask_imgs = self._ph.pred_mask_imgs last_cell_state_1 = self._ph.cells_1[:, -1, :, :, :] last_cell_state_2 = self._ph.cells_2[:, -1, :, :, :] weight_decay = 0.00001 predicted_history = tf.zeros(shape=(self.batch_size, 28, 28, 1)) # Drawing the canvas for i in range(self.seq_len): pred_polys_t = pred_polys[:, i] # batch x indices = tf.concat( [tf.reshape(tf.range(0, self.batch_size), (self.batch_size, 1)), tf.cast(pred_polys_t, tf.int32)], axis=1) updates = tf.ones(shape=self.batch_size) pred_polys_t = tf.scatter_nd(indices, updates, shape=(self.batch_size, 28, 28)) predicted_history = predicted_history + tf.expand_dims(pred_polys_t, axis=-1) xt = tf.concat([cnn_feats, predicted_history, pred_mask_imgs, last_cell_state_1, last_cell_state_2], axis=3) with slim.arg_scope([slim.conv2d], kernel_size=[3, 3], stride=1, weights_regularizer=slim.l2_regularizer(weight_decay), activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params={"is_training": self.is_training, "decay": 0.99, "center": True, "scale": True}, weights_initializer=layers.variance_scaling_initializer( factor=2.0, mode='FAN_IN', uniform=False) ): self._conv1 = slim.conv2d(xt, scope="conv1", num_outputs=16) self._conv2 = slim.conv2d(self._conv1, scope="conv2", num_outputs=1) output = layers.fully_connected(slim.flatten(self._conv2), 1, weights_regularizer=layers.l2_regularizer(1e-5), scope="FC") return output
def __init__(self, max_len_left, max_len_right, vocab_size, embedding_size, num_hidden, d_1, d_l, k_1, k_2, num_layers, d_c, num_attentions, d_o, num_iter, mu=1e-2, l2_reg_lambda=0.0): regularizer = layers.l2_regularizer(l2_reg_lambda) # placeholder for input data self.input_left = tf.placeholder(tf.int32, shape=[None, max_len_left], name="input_left") self.input_right = tf.placeholder(tf.int32, shape=[None, max_len_right], name="input_right") self.input_y = tf.placeholder(tf.float32, shape=[None, 2], name="input_y") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") with tf.name_scope("embedding"): self.embedding_weight = tf.get_variable("embedding_weight", shape=[vocab_size, embedding_size], dtype=tf.float32, initializer=tf.truncated_normal_initializer()) self.emb_left = tf.nn.embedding_lookup(self.embedding_weight, self.input_left, name="emb_left") self.emb_right = tf.nn.embedding_lookup(self.embedding_weight, self.input_right, name="emb_right") self.length_left = self.get_length(self.input_left) self.length_right = self.get_length(self.input_right) with tf.name_scope('dense_layers'): X_1_left = dense_layer(self.emb_left, 1, d_1, self.dropout_keep_prob, 'dense_layer_1', regularizer) X_1_right = dense_layer(self.emb_right, 1, d_1, self.dropout_keep_prob, 'dense_layer_1', regularizer) k_size_list = [k_1, k_2] layer_outputs_left = [[X_1_left], [X_1_left]] layer_outputs_right = [[X_1_right], [X_1_right]] for k in range(2): for l in range(2, num_layers + 1): temp_inputs_left = tf.concat(layer_outputs_left[k], axis=-1) temp_inputs_right = tf.concat(layer_outputs_right[k], axis=-1) X_i_left = dense_layer(temp_inputs_left, k_size_list[k], d_l, self.dropout_keep_prob, 'dense_layer_{}_{}'.format(k, l), regularizer) X_i_right = dense_layer(temp_inputs_right, k_size_list[k], d_l, self.dropout_keep_prob, 'dense_layer_{}_{}'.format(k, l), regularizer) layer_outputs_left[k].append(X_i_left) layer_outputs_right[k].append(X_i_right) concat_outputs_left = [self.emb_left] + layer_outputs_left[0] + layer_outputs_left[1] concat_outputs_right = [self.emb_right] + layer_outputs_right[0] + layer_outputs_right[1] self.X_c_left = dense_layer(tf.concat(concat_outputs_left, -1), 1, d_c, self.dropout_keep_prob, 'dense_layer_c', regularizer) self.X_c_right = dense_layer(tf.concat(concat_outputs_right, -1), 1, d_c, self.dropout_keep_prob, 'dense_layer_c', regularizer) with tf.name_scope('dynamic_self_attention'): Z_left = [] Z_right = [] W_j = [] for j in range(num_attentions): X_hat_left, W = fc_layer(self.X_c_left, d_o, 1.0, 'dsa_{}'.format(j), regularizer) X_hat_right, _ = fc_layer(self.X_c_right, d_o, 1.0, 'dsa_{}'.format(j), regularizer) q_left = tf.zeros(shape=[tf.shape(X_hat_left)[0], 1, max_len_left], dtype=tf.float32) q_right = tf.zeros(shape=[tf.shape(X_hat_right)[0], 1, max_len_right], dtype=tf.float32) for r in range(num_iter): a_left = get_masked_weights(q_left, self.length_left, max_len_left) s_left = tf.matmul(a_left, X_hat_left) # [batch_size, 1, d_o] z_left = tf.nn.tanh(s_left) a_right = get_masked_weights(q_right, self.length_right, max_len_right) s_right = tf.matmul(a_right, X_hat_right) # [batch_size, 1, d_o] z_right = tf.nn.tanh(s_right) if r == num_iter-1: Z_left.append(tf.reshape(z_left, shape=[-1, d_o])) Z_right.append(tf.reshape(z_right, shape=[-1, d_o])) # for visualize att_left = tf.identity(a_left, name='attention_left') att_right = tf.identity(a_right, name='attention_right') X_left_temp = X_hat_left / tf.sqrt(tf.reduce_sum(tf.square(X_hat_left), axis=-1, keepdims=True)) z_left_temp = z_left / tf.sqrt(tf.reduce_sum(tf.square(z_left), axis=-1, keepdims=True)) X_right_temp = X_hat_right / tf.sqrt(tf.reduce_sum(tf.square(X_hat_right), axis=-1, keepdims=True)) z_right_temp = z_right / tf.sqrt(tf.reduce_sum(tf.square(z_right), axis=-1, keepdims=True)) q_left = q_left + tf.matmul(z_left_temp, tf.transpose(X_left_temp, [0, 2, 1])) q_right = q_right + tf.matmul(z_right_temp, tf.transpose(X_right_temp, [0, 2, 1])) W_j.append(W) with tf.name_scope('penalization'): self.penalty = 0.0 for i in range(num_attentions): for j in range(i+1, num_attentions): self.penalty += tf.nn.relu(1 - tf.square(tf.norm(W_j[i]-W_j[j], ord='fro', axis=[0, 1]))) with tf.name_scope('mlp_layer'): self.V_left = tf.concat(Z_left, axis=-1) self.V_right = tf.concat(Z_right, axis=-1) self.V = tf.concat([self.V_left, self.V_right, tf.abs(self.V_left-self.V_right), tf.multiply(self.V_left, self.V_right)], axis=-1) output, _ = fc_layer(self.V, num_hidden, self.dropout_keep_prob, 'fc_1', regularizer=regularizer) # has a shortcut connection self.full_out, _ = fc_layer(tf.concat([self.V, output], axis=-1), num_hidden, self.dropout_keep_prob, 'fc_2', regularizer=regularizer) with tf.name_scope("output"): W = tf.get_variable( "W_output", shape=[num_hidden, 2], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(), regularizer=regularizer) b = tf.get_variable("b_output", shape=[2], dtype=tf.float32, initializer=tf.constant_initializer(0.1), regularizer= regularizer) self.scores = tf.nn.xw_plus_b(self.full_out, W, b, name="scores") self.predictions = tf.argmax(self.scores, 1, name="predictions") # CalculateMean cross-entropy loss with tf.name_scope("loss"): losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y) self.loss = tf.reduce_mean(losses) + mu * self.penalty + sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) # Accuracy with tf.name_scope("accuracy"): correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32), name="accuracy")
def test(self, test_list, modelpath): with self.graph.as_default(): c3d_net = [ ["conv", "conv1", [3, 3, 3, 3, 64], 'wc1', 'bc1'], ["maxpool", "pool1", [1, 1, 2, 2, 1]], ["conv", "conv2", [3, 3, 3, 64, 128], 'wc2', 'bc2'], ["maxpool", "pool2", [1, 2, 2, 2, 1]], ["conv", "conv3a", [3, 3, 3, 128, 256], 'wc3a', 'bc3a'], ["conv", "conv3b", [3, 3, 3, 256, 256], 'wc3b', 'bc3b'], ["maxpool", "pool3", [1, 2, 2, 2, 1]], ["conv", "conv4a", [3, 3, 3, 256, 512], 'wc4a', 'bc4a'], ["conv", "conv4b", [3, 3, 3, 512, 512], 'wc4b', 'bc4b'], ["maxpool", "pool4", [1, 2, 2, 2, 1]], ["conv", "conv5a", [3, 3, 3, 512, 512], 'wc5a', 'bc5a'], ["conv", "conv5b", [3, 3, 3, 512, 512], 'wc5b', 'bc5b'], ["maxpool", "pool5", [1, 2, 2, 2, 1]], ["transpose", [0, 1, 4, 2, 3]], #only use it if you restore the sports1m_finetuning_ucf101.model, otherwise uncomment it,(e.g use conv3d_deepnetA_sport1m_iter_1900000_TF.model) ["reshape", [-1, 8192]], ["fc", "fc1", [8192, 4096], 'wd1', 'bd1', True], ["dropout", "dropout1", self.keep_prob], ["fc", "fc2", [4096, 4096],'wd2','bd2', True], ["dropout", "dropout2", self.keep_prob], ["fc", "fc3", [4096, self.num_class],'wout','bout',False], ] # print(tf.trainable_variables()) # print(var_list) # print(tf.get_collection(tf.GraphKeys.WEIGHTS)) # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = 0.5) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.9 with tf.Session(config=config, graph=self.graph) as sess: logits = self.parseNet(self.inputs, c3d_net) softmax_logits = tf.nn.softmax(logits) # int_label = tf.one_hot(self.labels, self.num_class) int_label = self.labels # [bs,101]-->[bs*4 or 8 or 16,101] # int_label=tf.concat( # [int_label,int_label,int_label,int_label,],axis=0) # int_label=tf.cast(int_label,dtype=tf.int64) task_loss = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=int_label)) # task_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = int_label)) # task_loss = -tf.reduce_sum(int_label*tf.log(logits)) acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(softmax_logits, axis=-1), int_label), tf.float32)) right_count = tf.reduce_sum(tf.cast(tf.equal(tf.argmax(softmax_logits, axis=1), int_label), tf.int32)) reg_loss = layers.apply_regularization(layers.l2_regularizer(5e-4), tf.get_collection(tf.GraphKeys.WEIGHTS)) total_loss = task_loss + reg_loss # train_var_list = [v for v in tf.trainable_variables() if v.name.find("conv") == -1] train_op = tf.train.GradientDescentOptimizer(self.lr).minimize( total_loss, global_step=self.global_step) # train_op = tf.train.MomentumOptimizer(self.lr,0.9).minimize( # total_loss, global_step = self.global_step,var_list=train_var_list) total_para = np.sum([np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()]) print('total_para:', total_para) # all CDC9 :28613120 #pool5 27655936 # train clip:762960 # test clip:302640 init = tf.global_variables_initializer() # var_list = [v for v in tf.trainable_variables() if v.name.find("conv") != -1] # 初始化只加载卷积层参数 # print(var_list) # saver = tf.train.Saver(tf.global_variables()) sess.run(init) saver = tf.train.Saver(tf.trainable_variables()) # saver.restore(sess, tf.train.latest_checkpoint(modelpath)) saver.restore(sess, modelpath + "sports1m_finetuning_ucf101.model") print("Model Loading Done!") step = 0 print_freq = 2 next_start_pos = 0 for one_epoch in range(1): epostarttime = time.time() starttime = time.time() total_v = 0.0 test_correct_num = 0 for i in tqdm(range(int(3783 / self.batch_size))): step += 1 total_v += self.batch_size train_batch, label_batch, next_start_pos, _, _ = read_clip_and_label( filename=test_list, batch_size=self.batch_size, num_frames_per_clip=self.CLIP_LENGTH, height=self.IMG_HEIGHT, width=self.IMG_WIDTH, start_pos=next_start_pos, shuffle=False ) assert len(train_batch)==self.batch_size train_batch = train_aug(train_batch, is_train=False, Crop_heith=self.CROP_HEIGHT, Crop_width=self.CROP_WIDTH,norm=True) val_feed = {self.inputs: train_batch, self.labels: label_batch} test_correct_num += sess.run(right_count, val_feed) print('test acc:', test_correct_num / total_v, 'test_correct_num:', test_correct_num, 'total_v:', total_v)
from FLAGS import * print("applyHDML:{}".format(FLAGS.Apply_HDML)) FLAGS.Apply_HDML = False if FLAGS.Apply_HDML: print("HDML is true") else: print("HDML is false") # Create the stream of datas from dataset streams = data_provider.get_streams_noFlip(FLAGS.batch_size, FLAGS.dataSet, method, crop_size=FLAGS.default_image_size) stream_train, stream_train_eval, stream_test = streams regularizer = layers.l2_regularizer(FLAGS.Regular_factor) # create a saver # check system time _time = time.strftime('%m-%d-%H-%M', time.localtime(time.time())) LOGDIR = FLAGS.log_save_path + FLAGS.dataSet + '/' + FLAGS.LossType + '/' + _time + '/' if FLAGS.SaveVal: nn_Ops.create_path(_time) summary_writer = tf.summary.FileWriter(LOGDIR) def main(_): if not FLAGS.LossType == 'NpairLoss': print("LossType n-pair-loss is required") return 0
''' 这里设置 training_rate 代表目前正在进行的训练轮数 一版这个值会随着训练的进行而同步增大 ''' training_step = tf.Variable(0) ''' 使用exponential_decay()函数设置学习率,global_step值为training_step ''' decayed_learning_rate = tf.train.exponential_decay(0.8, training_step, 100, 0.9, staircase=True) ''' 使用一个梯度优化器,其中损失函数loss式目标函数 # ''' # learning_step = tf.train.GradientDescentOptimizer(decayed_learning_rate)\ # .minimize(loss,globals()training_step); # weights = tf.constant([[1.0, 2.0], [-3.0, -4.0]]) # weights = tf.constant([3.0, 4.0]) weights = tf.constant([3.0]) ''' l1 & l2 范数 ''' regularizer_l1 = layers.l1_regularizer(.5) regularizer_l2 = layers.l2_regularizer(.5) with tf.Session() as sess: print(sess.run(regularizer_l1(weights))) print(sess.run(regularizer_l2(weights)))
def rendering_Net(inputs, masks, height, width, n_layers=12, n_pools=2, is_training=True, depth_base=64): conv_layers = np.int32(n_layers / 2) - 1 deconv_layers = np.int32(n_layers / 2) # number of layers before perform pooling nlayers_befPool = np.int32(np.ceil((conv_layers - 1) / n_pools) - 1) max_depth = 512 if depth_base * 2**n_pools < max_depth: tail = conv_layers - nlayers_befPool * n_pools tail_deconv = deconv_layers - nlayers_befPool * n_pools else: maxNum_pool = np.log2(max_depth / depth_base) tail = np.int32(conv_layers - nlayers_befPool * maxNum_pool) tail_deconv = np.int32(deconv_layers - nlayers_befPool * maxNum_pool) f_in_conv = [3] + [ np.int32(depth_base * 2**(np.ceil(i / nlayers_befPool) - 1)) for i in range(1, conv_layers - tail + 1) ] + [ np.int32(depth_base * 2**maxNum_pool) for i in range(conv_layers - tail + 1, conv_layers + 1) ] f_out_conv = [64] + [ np.int32(depth_base * 2**(np.floor(i / nlayers_befPool))) for i in range(1, conv_layers - tail + 1) ] + [ np.int32(depth_base * 2**maxNum_pool) for i in range(conv_layers - tail + 1, conv_layers + 1) ] f_in_deconv = f_out_conv[:0:-1] + [64] f_out_amDeconv = f_in_conv[:0:-1] + [3] f_out_MaskDeconv = f_in_conv[:0:-1] + [1] f_out_nmDeconv = f_in_conv[:0:-1] + [2] ### contractive conv_layer block conv_out = inputs conv_out_list = [] for i, f_in, f_out in zip(range(1, conv_layers + 2), f_in_conv, f_out_conv): scope = 'generator/conv' + str(i) if np.mod(i - 1, nlayers_befPool ) == 0 and i <= n_pools * nlayers_befPool + 1 and i != 1: conv_out_list.append(conv_out) conv_out = conv2d(conv_out, scope, f_in, f_out) conv_out = tf.nn.max_pool(conv_out, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') else: conv_out = conv2d(conv_out, scope, f_in, f_out) ### expanding deconv_layer block succeeding conv_layer block deconv_out = conv_out for i, f_in, f_out in zip(range(1, deconv_layers + 1), f_in_deconv, f_out_amDeconv): scope = 'generator/deconv' + str(i) # expand resolution every after nlayers_befPool deconv_layer if np.mod(i, nlayers_befPool) == 0 and i <= n_pools * nlayers_befPool: tmp = conv_out_list[-np.int32(i / nlayers_befPool)] deconv_out = conv2d( tf.image.resize_bilinear(deconv_out, tmp.shape[1:3]), scope, f_in, f_out) tmp = conv2d(tmp, scope + '/concat', f_in, f_out) deconv_out = tmp + deconv_out elif i == deconv_layers: deconv_out = layers.conv2d( deconv_out, num_outputs=f_out, kernel_size=[3, 3], stride=[1, 1], padding='SAME', normalizer_fn=None, activation_fn=None, weights_initializer=tf.random_normal_initializer( mean=0, stddev=np.sqrt(2 / 9 / f_in)), weights_regularizer=layers.l2_regularizer(scale=1e-5), scope=scope) else: # layers that not expand spatial resolution deconv_out = conv2d(deconv_out, scope, f_in, f_out) return tf.clip_by_value(tf.nn.sigmoid(deconv_out), 1e-4, .9999)
def concat2pred(self,arg1,arg2): ''' args:arg1/arg2: a 2D tensor of shape (batch_size,hidden_units) function: softmax(W*concat(arg1,arg2)) return: pred ''' with tf.variable_scope("concat2predict_layer"): h_predict= tf.concat([arg1,arg2],axis=1,name="h_predict") W_pred = tf.get_variable("concat_W_pred", shape=[2*self.config.hidden_units, 3],regularizer=l2_regularizer(self.config.l2_strength)) pred = tf.nn.softmax(tf.matmul(h_predict, W_pred), name="pred") return pred
def weight_variable(name, shape, regularization=None): regularizer = None if regularization is not None: regularizer = l2_regularizer(1e-5) return tf.get_variable(name, shape=shape, initializer=xavier_initializer(), regularizer=regularizer)
pi1 = tf.Variable(tf.zeros([inputSize]), trainable=True) pi2 = tf.Variable(tf.zeros([inputSize]), trainable=True) pi3 = tf.Variable(tf.zeros([inputSize]), trainable=True) E1 = tf.nn.sigmoid(tf.matmul(X, V1) + mu1) E2 = tf.nn.sigmoid(tf.add(tf.matmul(E1, V2), mu2)) E3 = tf.nn.sigmoid(tf.add(tf.matmul(E2, V3), mu3)) YS1 = tf.multiply(tf.identity(tf.add(tf.matmul(E1, S1), pi1)), mapping) YS2 = tf.multiply(tf.identity(tf.add(tf.matmul(E2, S2), pi2)), mapping) YS3 = tf.multiply(tf.identity(tf.add(tf.matmul(E3, S3), pi3)), mapping) Ypool = (YS1 + YS2 + YS3) / 3 regularize = layers.apply_regularization(layers.l2_regularizer(scale=lambdaR), weights_list=[V1, V2, V3, S1, S2, S3]) difference1NM = X - YS1 difference2NM = X - YS2 difference3NM = X - YS3 differencePool = X - Ypool Loss1NM = tf.reduce_sum(tf.square(difference1NM)) Loss2NM = tf.reduce_sum(tf.square(difference2NM)) Loss3NM = tf.reduce_sum(tf.square(difference3NM)) LossPool = tf.reduce_sum(tf.square(differencePool)) loss = Loss1NM + Loss2NM + Loss3NM + LossPool + regularize optimizer = layers.optimize_loss(loss=loss,