def make_encoder(activation, num_topics, layer_sizes): """Create the encoder function. Args: activation: Activation function to use. num_topics: The number of topics. layer_sizes: The number of hidden units per layer in the encoder. Returns: encoder: A `callable` mapping a bag-of-words `Tensor` to a `tf.distributions.Distribution` instance over topics. """ encoder_net = tf.keras.Sequential() for num_hidden_units in layer_sizes: encoder_net.add(tf.keras.layers.Dense( num_hidden_units, activation=activation, kernel_initializer=tf.glorot_normal_initializer())) encoder_net.add(tf.keras.layers.Dense( num_topics, activation=tf.nn.softplus, kernel_initializer=tf.glorot_normal_initializer())) def encoder(bag_of_words): net = _clip_dirichlet_parameters(encoder_net(bag_of_words)) return tfd.Dirichlet(concentration=net, name="topics_posterior") return encoder
def make_lda_variational(activation, num_topics, layer_sizes): """Creates the variational distribution for LDA. Args: activation: Activation function to use. num_topics: The number of topics. layer_sizes: The number of hidden units per layer in the encoder. Returns: lda_variational: A function that takes a bag-of-words Tensor as input and returns a distribution over topics. """ encoder_net = tf.keras.Sequential() for num_hidden_units in layer_sizes: encoder_net.add(tf.keras.layers.Dense( num_hidden_units, activation=activation, kernel_initializer=tf.glorot_normal_initializer())) encoder_net.add(tf.keras.layers.Dense( num_topics, activation=tf.nn.softplus, kernel_initializer=tf.glorot_normal_initializer())) def lda_variational(bag_of_words): concentration = _clip_dirichlet_parameters(encoder_net(bag_of_words)) return ed.Dirichlet(concentration=concentration, name="topics_posterior") return lda_variational
def make_decoder(num_topics, num_words): """Create the decoder function. Args: num_topics: The number of topics. num_words: The number of words. Returns: decoder: A `callable` mapping a `Tensor` of encodings to a `tf.distributions.Distribution` instance over words. """ topics_words_logits = tf.get_variable( "topics_words_logits", shape=[num_topics, num_words], initializer=tf.glorot_normal_initializer()) topics_words = tf.nn.softmax(topics_words_logits, axis=-1) def decoder(topics): word_probs = tf.matmul(topics, topics_words) # The observations are bag of words and therefore not one-hot. However, # log_prob of OneHotCategorical computes the probability correctly in # this case. return tfd.OneHotCategorical(probs=word_probs, name="bag_of_words") return decoder, topics_words
def model_fn(features, labels, mode, params): """Build Model function f(x) for Estimator.""" #------hyper parameters------ field_size = params['field_size'] feature_size = params['feature_size'] embedding_size = params['embedding_size'] l2_reg = params['l2_reg'] learning_rate = params['learning_rate'] dropout = params['dropout'] attention_factor = params['attention_factor'] #------build weights------ Global_Bias = tf.get_variable("bias", shape=[1], initializer=tf.constant_initializer(0.0)) Feat_Wgts = tf.get_variable("linear", shape=[feature_size], initializer=tf.glorot_normal_initializer()) Feat_Emb = tf.get_variable("emb", shape=[feature_size, embedding_size], initializer=tf.glorot_normal_initializer()) #------build feature------ feat_ids = features['feat_ids'] feat_vals = features['feat_vals'] feat_ids = tf.reshape(feat_ids, shape=[-1, field_size]) feat_vals = tf.reshape(feat_vals, shape=[-1, field_size]) # None * F #------build f(x)------ # FM部分: sum(wx) with tf.variable_scope("Linear-part"): feat_wgts = tf.nn.embedding_lookup(Feat_Wgts, feat_ids) # None * F * 1 y_linear = tf.reduce_sum(tf.multiply(feat_wgts, feat_vals), 1) #Deep部分 with tf.variable_scope("Embedding_Layer"): embeddings = tf.nn.embedding_lookup(Feat_Emb, feat_ids) # None * F * K feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1]) # None * F * 1 embeddings = tf.multiply(embeddings, feat_vals) # None * F * K with tf.variable_scope("Pair-wise_Interaction_Layer"): num_interactions = field_size * (field_size - 1) / 2 element_wise_product_list = [] for i in range(0, field_size): for j in range(i + 1, field_size): element_wise_product_list.append( tf.multiply(embeddings[:, i, :], embeddings[:, j, :])) element_wise_product_list = tf.stack( element_wise_product_list) # (F*(F-1)/2) * None * K stack拼接矩阵 element_wise_product_list = tf.transpose( element_wise_product_list, perm=[1, 0, 2]) # None * (F(F-1)/2) * K # 得到Attention Score with tf.variable_scope("Attention_Netowrk"): deep_inputs = tf.reshape(element_wise_product_list, shape=[-1, embedding_size]) # (None*F(F-1)/2) * K deep_inputs = contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=attention_factor, activation_fn=tf.nn.relu, \ weights_regularizer=contrib.layers.l2_regularizer(l2_reg), scope="attention_net_mlp") aij = contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, \ weights_regularizer=contrib.layers.l2_regularizer(l2_reg), scope="attention_net_out") # (None*F(F-1)/2) * 1 # 得到attention score之后,使用softmax进行规范化 aij = tf.reshape(aij, shape=[-1, int(num_interactions), 1]) aij_softmax = tf.nn.softmax( aij, dim=1, name="attention_net_softout") # None * num_interactions # TODO: 为什么要对attention score进行dropout那?? 这里不是很懂 if mode == tf.estimator.ModeKeys.TRAIN: aij_softmax = tf.nn.dropout(aij_softmax, keep_prob=dropout[0]) with tf.variable_scope("Attention-based_Pooling_Layer"): deep_inputs = tf.multiply(element_wise_product_list, aij_softmax) # None * (F(F-1)/2) * K deep_inputs = tf.reduce_sum(deep_inputs, axis=1) # None * K Pooling操作 # Attention-based Pooling Layer的输出也要经过Dropout if mode == tf.estimator.ModeKeys.TRAIN: deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[1]) # 该层的输出是一个K维度的向量 with tf.variable_scope("Prediction_Layer"): # 直接跟上输出单元 deep_inputs = contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, \ weights_regularizer=contrib.layers.l2_regularizer(l2_reg), scope="afm_out") # None * 1 y_deep = tf.reshape(deep_inputs, shape=[-1]) # None with tf.variable_scope("AFM_overall"): y_bias = Global_Bias * tf.ones_like(y_deep, dtype=tf.float32) y = y_bias + y_linear + y_deep pred = tf.nn.sigmoid(y) # set predictions predictions = {"prob": pred} export_outputs = { tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions) } # Provide an estimator spec for `ModeKeys.PREDICT` if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, export_outputs=export_outputs) #------build loss------ loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels) ) + l2_reg * tf.nn.l2_loss(Feat_Wgts) + l2_reg * tf.nn.l2_loss(Feat_Emb) log_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)) # Provide an estimator spec for `ModeKeys.EVAL` eval_metric_ops = { # "logloss": tf.losses.log_loss(pred, labels, weights=1.0, scope=None, epsilon=1e-07,loss_collection=tf.GraphKeys.LOSSES, reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS), "auc": tf.metrics.auc(labels, pred), } if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, eval_metric_ops=eval_metric_ops) #------build optimizer------ optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) # Provide an estimator spec for `ModeKeys.TRAIN` if mode == tf.estimator.ModeKeys.TRAIN: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=log_loss, # 只打印pure log_loss,但是训练依旧按照整个的loss来训练 train_op=train_op)
def __init__(self, config, vocab_size): # Placeholders for data, output and dropout self.config = config self.input_x1 = tf.placeholder(tf.int32, [None, self.config.max_document_length], name="input_x1") self.input_x2 = tf.placeholder(tf.int32, [None, self.config.max_document_length], name="input_x2") self.input_y = tf.placeholder(tf.float32, [None], name="input_y") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") self.attention_w = tf.Variable(tf.truncated_normal( [2 * self.config.hidden_units, self.config.attention_size], stddev=0.1), name='attention_w') self.attention_b = tf.Variable(tf.constant( 0.1, shape=[self.config.attention_size]), name='attention_b') self.u_w = tf.Variable(tf.truncated_normal( [self.config.attention_size, 1]), name='attention_uw') self.initializer = None if self.config.initializer == "normal": self.initializer = tf.random_normal_initializer(mean=0.0, stddev=0.1) elif self.config.initializer == "glorot": self.initializer = tf.glorot_uniform_initializer() elif self.config.initializer == "xavier": self.initializer = tf.glorot_normal_initializer() else: raise ValueError("Unknown initializer") # Embedding layer with tf.name_scope("embedding"): self.W = tf.get_variable( 'lookup_table', dtype=tf.float32, shape=[vocab_size, self.config.embedding_dim], initializer=self.initializer, trainable=True) self.embedded_chars1 = tf.nn.embedding_lookup( self.W, self.input_x1) self.embedded_chars2 = tf.nn.embedding_lookup( self.W, self.input_x2) with tf.name_scope("output"): # add cnn layer output1 = self.cnn_layer(self.embedded_chars1, "side1") output2 = self.cnn_layer(self.embedded_chars2, "side2") self.out1 = self.BiRNN(output1, self.dropout_keep_prob, "side1", self.config.max_document_length, self.config.hidden_units) self.out1 = self._highway_layer(self.out1, self.out1.get_shape()[1], num_layers=1, bias=0, scope="side1") self.out2 = self.BiRNN(output2, self.dropout_keep_prob, "side2", self.config.max_document_length, self.config.hidden_units) self.out2 = self._highway_layer(self.out2, self.out2.get_shape()[1], num_layers=1, bias=0, scope="side2") self.distance = tf.sqrt( tf.reduce_sum(tf.square(tf.subtract(self.out1, self.out2)), 1, keepdims=True)) self.distance = tf.div( self.distance, tf.add( tf.sqrt( tf.reduce_sum(tf.square(self.out1), 1, keepdims=True)), tf.sqrt( tf.reduce_sum(tf.square(self.out2), 1, keepdims=True)))) self.distance = tf.reshape(self.distance, [-1], name="distance") with tf.name_scope("loss"): self.loss = self.contrastive_loss(self.input_y, self.distance, self.config.batch_size) with tf.name_scope("accuracy"): self.temp_sim = tf.subtract(tf.ones_like(self.distance), tf.round(self.distance), name="temp_sim") # auto threshold 0.4 self.correct_predictions = tf.equal(self.temp_sim, self.input_y) self.accuracy = tf.reduce_mean(tf.cast(self.correct_predictions, "float"), name="accuracy")
def masked_dense(inputs, units, num_blocks=None, exclusive=False, kernel_initializer=None, reuse=None, name=None, *args, # pylint: disable=keyword-arg-before-vararg **kwargs): """A autoregressively masked dense layer. Analogous to `tf.layers.dense`. See [Germain et al. (2015)][1] for detailed explanation. Arguments: inputs: Tensor input. units: Python `int` scalar representing the dimensionality of the output space. num_blocks: Python `int` scalar representing the number of blocks for the MADE masks. exclusive: Python `bool` scalar representing whether to zero the diagonal of the mask, used for the first layer of a MADE. kernel_initializer: Initializer function for the weight matrix. If `None` (default), weights are initialized using the `tf.glorot_random_initializer`. reuse: Python `bool` scalar representing whether to reuse the weights of a previous layer by the same name. name: Python `str` used to describe ops managed by this function. *args: `tf.layers.dense` arguments. **kwargs: `tf.layers.dense` keyword arguments. Returns: Output tensor. Raises: NotImplementedError: if rightmost dimension of `inputs` is unknown prior to graph execution. #### References [1]: Mathieu Germain, Karol Gregor, Iain Murray, and Hugo Larochelle. MADE: Masked Autoencoder for Distribution Estimation. In _International Conference on Machine Learning_, 2015. https://arxiv.org/abs/1502.03509 """ # TODO(b/67594795): Better support of dynamic shape. input_depth = tf.dimension_value(inputs.shape.with_rank_at_least(1)[-1]) if input_depth is None: raise NotImplementedError( "Rightmost dimension must be known prior to graph execution.") mask = _gen_mask(num_blocks, input_depth, units, MASK_EXCLUSIVE if exclusive else MASK_INCLUSIVE).T if kernel_initializer is None: kernel_initializer = tf.glorot_normal_initializer() def masked_initializer(shape, dtype=None, partition_info=None): return mask * kernel_initializer(shape, dtype, partition_info) with tf.name_scope(name, "masked_dense", [inputs, units, num_blocks]): layer = tf.layers.Dense( units, kernel_initializer=masked_initializer, kernel_constraint=lambda x: mask * x, name=name, dtype=inputs.dtype.base_dtype, _scope=name, _reuse=reuse, *args, # pylint: disable=keyword-arg-before-vararg **kwargs) return layer.apply(inputs)
def fcn_paper(inputs_32s, inputs_16s, inputs_8s, img_height, img_width, is_training=True): #inputs: [batch,h,w,channels]. And can be (1, any_size, any_size, channels). #logits: [batch,h,w,classes] #upsampled_logits: [batch,H,W,classes] #annotation: [batch,H,W] #loss: [batch,H,W] #((11, 15, 512), (23, 31, 512), (46, 62, 256), (375, 500), b'2007_000645', '2007_000645') with tf.variable_scope('fcn/logits/32s') as scope: weights = tf.get_variable('weights', shape=[1, 1, 4096, num_classes], dtype=tf.float32, initializer=tf.glorot_normal_initializer()) biases = tf.get_variable('biases', shape=[num_classes], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) conv = tf.nn.conv2d(inputs_32s, weights, strides=[1, 1, 1, 1], padding='SAME') logits_32s = tf.nn.bias_add(conv, biases) convt_weights = get_deconv_filter([4, 4, num_classes, num_classes]) inputs_16s_shape = tf.shape(inputs_16s) logits_32s_upsampled = tf.nn.conv2d_transpose(value=logits_32s, filter=convt_weights, output_shape=[ inputs_16s_shape[0], inputs_16s_shape[1], inputs_16s_shape[2], num_classes ], strides=[1, 2, 2, 1], padding='SAME', data_format='NHWC', name=None) with tf.variable_scope('fcn/logits/16s') as scope: weights = tf.get_variable('weights', shape=[1, 1, 512, num_classes], dtype=tf.float32, initializer=tf.glorot_normal_initializer()) biases = tf.get_variable('biases', shape=[num_classes], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) conv = tf.nn.conv2d(inputs_16s, weights, strides=[1, 1, 1, 1], padding='SAME') logits_16s = tf.nn.bias_add(conv, biases) fused_logits_16s = logits_16s + logits_32s_upsampled convt_weights = get_deconv_filter([4, 4, num_classes, num_classes]) inputs_8s_shape = tf.shape(inputs_8s) logits_16s_upsampled = tf.nn.conv2d_transpose(value=fused_logits_16s, filter=convt_weights, output_shape=[ inputs_8s_shape[0], inputs_8s_shape[1], inputs_8s_shape[2], num_classes ], strides=[1, 2, 2, 1], padding='SAME', data_format='NHWC', name=None) with tf.variable_scope('fcn/logits/8s') as scope: weights = tf.get_variable('weights', shape=[1, 1, 256, num_classes], dtype=tf.float32, initializer=tf.glorot_normal_initializer()) biases = tf.get_variable('biases', shape=[num_classes], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) conv = tf.nn.conv2d(inputs_8s, weights, strides=[1, 1, 1, 1], padding='SAME') logits_8s = tf.nn.bias_add(conv, biases) fused_logits_8s = logits_8s + logits_16s_upsampled convt_weights = get_deconv_filter([16, 16, num_classes, num_classes]) logits_8s_upsampled = tf.nn.conv2d_transpose(value=fused_logits_8s, filter=convt_weights, output_shape=[ inputs_8s_shape[0], img_height, img_width, num_classes ], strides=[1, 8, 8, 1], padding='SAME', data_format='NHWC', name=None) return logits_8s_upsampled
def model_fn(features, labels, mode, params): """Bulid Model function f(x) for Estimator.""" #------hyperparameters---- field_size = params["field_size"] feature_size = params["feature_size"] embedding_size = params["embedding_size"] l2_reg = params["l2_reg"] learning_rate = params["learning_rate"] #batch_norm_decay = params["batch_norm_decay"] #optimizer = params["optimizer"] layers = map(int, params["deep_layers"].split(',')) dropout = map(float, params["dropout"].split(',')) #------bulid weights------ FM_B = tf.get_variable(name='fm_bias', shape=[1], initializer=tf.constant_initializer(0.0)) print "FM_B", FM_B.get_shape() FM_W = tf.get_variable(name='fm_w', shape=[feature_size], initializer=tf.glorot_normal_initializer()) print "FM_W", FM_W.get_shape() # F FM_V = tf.get_variable(name='fm_v', shape=[feature_size, embedding_size], initializer=tf.glorot_normal_initializer()) # F * E print "FM_V", FM_V.get_shape() #------build feaure------- feat_ids = features['feat_ids'] print "feat_ids", feat_ids.get_shape() feat_ids = tf.reshape(feat_ids,shape=[-1,field_size]) # None * f/K * K print "feat_ids", feat_ids.get_shape() feat_vals = features['feat_vals'] print "feat_vals", feat_vals.get_shape() feat_vals = tf.reshape(feat_vals,shape=[-1,field_size]) # None * f/K * K print "feat_vals", feat_vals.get_shape() #------build f(x)------ with tf.variable_scope("First-order"): feat_wgts = tf.nn.embedding_lookup(FM_W, feat_ids) # None * f/K * K print "feat_wgts", feat_wgts.get_shape() y_w = tf.reduce_sum(tf.multiply(feat_wgts, feat_vals),1) with tf.variable_scope("Second-order"): embeddings = tf.nn.embedding_lookup(FM_V, feat_ids) # None * f/K * K * E print "embeddings", embeddings.get_shape() feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1]) # None * f/K * K * 1 ? print "feat_vals", feat_vals.get_shape() embeddings = tf.multiply(embeddings, feat_vals) #vij*xi print "embeddings", embeddings.get_shape() sum_square = tf.square(tf.reduce_sum(embeddings,1)) # None * K * E print "sum_square", sum_square.get_shape() square_sum = tf.reduce_sum(tf.square(embeddings),1) print "square_sum", square_sum.get_shape() y_v = 0.5*tf.reduce_sum(tf.subtract(sum_square, square_sum),1) # None * 1 with tf.variable_scope("Deep-part"): if FLAGS.batch_norm: #normalizer_fn = tf.contrib.layers.batch_norm #normalizer_fn = tf.layers.batch_normalization if mode == tf.estimator.ModeKeys.TRAIN: train_phase = True #normalizer_params = {'decay': batch_norm_decay, 'center': True, 'scale': True, 'updates_collections': None, 'is_training': True, 'reuse': None} else: train_phase = False #normalizer_params = {'decay': batch_norm_decay, 'center': True, 'scale': True, 'updates_collections': None, 'is_training': False, 'reuse': True} else: normalizer_fn = None normalizer_params = None deep_inputs = tf.reshape(embeddings,shape=[-1,field_size*embedding_size]) # None * (F*K) for i in range(len(layers)): #if FLAGS.batch_norm: # deep_inputs = batch_norm_layer(deep_inputs, train_phase=train_phase, scope_bn='bn_%d' %i) #normalizer_params.update({'scope': 'bn_%d' %i}) deep_inputs = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=layers[i], \ #normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, \ weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='mlp%d' % i) if FLAGS.batch_norm: deep_inputs = batch_norm_layer(deep_inputs, train_phase=train_phase, scope_bn='bn_%d' %i) #放在RELU之后 https://github.com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm.md#bn----before-or-after-relu if mode == tf.estimator.ModeKeys.TRAIN: deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[i]) #Apply Dropout after all BN layers and set dropout=0.8(drop_ratio=0.2) #deep_inputs = tf.layers.dropout(inputs=deep_inputs, rate=dropout[i], training=mode == tf.estimator.ModeKeys.TRAIN) y_deep = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, \ weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='deep_out') y_d = tf.reshape(y_deep,shape=[-1]) #sig_wgts = tf.get_variable(name='sigmoid_weights', shape=[layers[-1]], initializer=tf.glorot_normal_initializer()) #sig_bias = tf.get_variable(name='sigmoid_bias', shape=[1], initializer=tf.constant_initializer(0.0)) #deep_out = tf.nn.xw_plus_b(deep_inputs,sig_wgts,sig_bias,name='deep_out') with tf.variable_scope("DeepFM-out"): #y_bias = FM_B * tf.ones_like(labels, dtype=tf.float32) # None * 1 warning;这里不能用label,否则调用predict/export函数会出错,train/evaluate正常;初步判断estimator做了优化,用不到label时不传 y_bias = FM_B * tf.ones_like(y_d, dtype=tf.float32) # None * 1 y = y_bias + y_w + y_v + y_d pred = tf.sigmoid(y) predictions={"prob": pred} export_outputs = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)} # Provide an estimator spec for `ModeKeys.PREDICT` if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, export_outputs=export_outputs) #------bulid loss------ loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)) + \ l2_reg * tf.nn.l2_loss(FM_W) + \ l2_reg * tf.nn.l2_loss(FM_V) #+ \ l2_reg * tf.nn.l2_loss(sig_wgts) # Provide an estimator spec for `ModeKeys.EVAL` eval_metric_ops = { "auc": tf.metrics.auc(labels, pred) } if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, eval_metric_ops=eval_metric_ops) #------bulid optimizer------ if FLAGS.optimizer == 'Adam': optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8) elif FLAGS.optimizer == 'Adagrad': optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate, initial_accumulator_value=1e-8) elif FLAGS.optimizer == 'Momentum': optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.95) elif FLAGS.optimizer == 'ftrl': optimizer = tf.train.FtrlOptimizer(learning_rate) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) # Provide an estimator spec for `ModeKeys.TRAIN` modes if mode == tf.estimator.ModeKeys.TRAIN: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): """Bulid Model function f(x) for Estimator.""" #------hyperparameters---- field_size = params["field_size"] feature_size = params["feature_size"] embedding_size = params["embedding_size"] l2_reg = params["l2_reg"] learning_rate = params["learning_rate"] #batch_norm_decay = params["batch_norm_decay"] #optimizer = params["optimizer"] layers = map(int, params["deep_layers"].split(',')) dropout = map(float, params["dropout"].split(',')) #------bulid weights------ #FM_B = tf.get_variable(name='fm_bias', shape=[1], initializer=tf.constant_initializer(0.0)) #FM_W = tf.get_variable(name='fm_w', shape=[feature_size], initializer=tf.glorot_normal_initializer()) #FM_V = tf.get_variable(name='fm_v', shape=[feature_size, embedding_size], initializer=tf.glorot_normal_initializer()) MVM_W = tf.get_variable(name='mvm_w', shape=[feature_size, embedding_size], initializer=tf.glorot_normal_initializer()) MVM_B = tf.get_variable(name='mvm_b', shape=[field_size, embedding_size], initializer=tf.glorot_normal_initializer()) #------build feaure------- feat_ids = features['feat_ids'] feat_ids = tf.reshape(feat_ids, shape=[-1, field_size]) feat_vals = features['feat_vals'] feat_vals = tf.reshape(feat_vals, shape=[-1, field_size]) #------build f(x)------ #with tf.variable_scope("First-order"): # feat_wgts = tf.nn.embedding_lookup(FM_W, feat_ids) # None * F * 1 # y_w = tf.reduce_sum(tf.multiply(feat_wgts, feat_vals),1) #with tf.variable_scope("Second-order"): # embeddings = tf.nn.embedding_lookup(FM_V, feat_ids) # None * F * K # feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1]) # embeddings = tf.multiply(embeddings, feat_vals) #vij*xi # sum_square = tf.square(tf.reduce_sum(embeddings,1)) # square_sum = tf.reduce_sum(tf.square(embeddings),1) # y_v = 0.5*tf.reduce_sum(tf.subtract(sum_square, square_sum),1) # None * 1 with tf.variable_scope("Embedding-layer"): embeddings = tf.nn.embedding_lookup(MVM_W, feat_ids) # None * F * K feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1]) embeddings = tf.multiply(embeddings, feat_vals) # None * F * K with tf.variable_scope("MVM-part"): all_order = tf.add(embeddings, MVM_B) x_mvm = all_order[:, 0, :] # None * 1 * K for i in range(1, field_size): x_mvm = tf.multiply(x_mvm, all_order[:, i, :]) x_mvm = tf.reshape(x_mvm, shape=[-1, embedding_size]) # None * K with tf.variable_scope("Deep-part"): if FLAGS.batch_norm: #normalizer_fn = tf.contrib.layers.batch_norm #normalizer_fn = tf.layers.batch_normalization if mode == tf.estimator.ModeKeys.TRAIN: train_phase = True #normalizer_params = {'decay': batch_norm_decay, 'center': True, 'scale': True, 'updates_collections': None, 'is_training': True, 'reuse': None} else: train_phase = False #normalizer_params = {'decay': batch_norm_decay, 'center': True, 'scale': True, 'updates_collections': None, 'is_training': False, 'reuse': True} else: normalizer_fn = None normalizer_params = None x_deep = tf.reshape(embeddings, shape=[-1, field_size * embedding_size ]) # None * (F*K) for i in range(len(layers)): #if FLAGS.batch_norm: # deep_inputs = batch_norm_layer(deep_inputs, train_phase=train_phase, scope_bn='bn_%d' %i) #normalizer_params.update({'scope': 'bn_%d' %i}) x_deep = tf.contrib.layers.fully_connected(inputs=x_deep, num_outputs=layers[i], \ #normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, \ weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='mlp%d' % i) if FLAGS.batch_norm: x_deep = batch_norm_layer( x_deep, train_phase=train_phase, scope_bn='bn_%d' % i ) #放在RELU之后 https://github.com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm.md#bn----before-or-after-relu if mode == tf.estimator.ModeKeys.TRAIN: x_deep = tf.nn.dropout( x_deep, keep_prob=dropout[i] ) #Apply Dropout after all BN layers and set dropout=0.8(drop_ratio=0.2) #x_deep = tf.layers.dropout(inputs=x_deep, rate=dropout[i], training=mode == tf.estimator.ModeKeys.TRAIN) with tf.variable_scope("DeepMVM-out"): x_stack = tf.concat([x_mvm, x_deep], axis=1) # None * ( F*K+ deep_layers[i]) y_deep = tf.contrib.layers.fully_connected(inputs=x_stack, num_outputs=1, activation_fn=tf.identity, \ weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='deep_out') y = tf.reshape(y_deep, shape=[-1]) pred = tf.sigmoid(y) predictions = {"prob": pred} export_outputs = { tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions) } # Provide an estimator spec for `ModeKeys.PREDICT` if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, export_outputs=export_outputs) #------bulid loss------ loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)) + \ l2_reg * tf.nn.l2_loss(MVM_W) + \ l2_reg * tf.nn.l2_loss(MVM_B) # Provide an estimator spec for `ModeKeys.EVAL` eval_metric_ops = {"auc": tf.metrics.auc(labels, pred)} if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, eval_metric_ops=eval_metric_ops) #------bulid optimizer------ if FLAGS.optimizer == 'Adam': optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8) elif FLAGS.optimizer == 'Adagrad': optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate, initial_accumulator_value=1e-8) elif FLAGS.optimizer == 'Momentum': optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.95) elif FLAGS.optimizer == 'ftrl': optimizer = tf.train.FtrlOptimizer(learning_rate) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) # Provide an estimator spec for `ModeKeys.TRAIN` modes if mode == tf.estimator.ModeKeys.TRAIN: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, train_op=train_op)
def build_input(features, params): cat_columns = params['cat_columns'] val_columns = params['val_columns'] column_to_field = params['column_to_field'] #dnn_columns = params['dnn_columns'] dimension_config = params['dimension_config'] reg = params['reg'] embed_dim = params['embed_dim'] embedding_table = EmbeddingTable() embedding_dict = OrderedDict() with tf.variable_scope("fm", reuse=tf.AUTO_REUSE, values=[features]) as scope: with tf.device('/cpu:0'): for name, col in cat_columns.items(): field = column_to_field.get(name, name) cur_dimension = dimension_config[ field] if field in dimension_config else embed_dim embedding_table.add_linear_weights(vocab_name=name, vocab_size=col._num_buckets) embedding_table.add_embed_weights(vocab_name=field, vocab_size=col._num_buckets, embed_dim=cur_dimension, reg=reg) for name, col in val_columns.items(): field = column_to_field.get(name, name) cur_dimension = dimension_config[ field] if field in dimension_config else embed_dim embedding_table.add_linear_weights(vocab_name=name, vocab_size=1) embedding_table.add_embed_weights(vocab_name=field, vocab_size=1, embed_dim=cur_dimension, reg=reg) builder = _LazyBuilder(features) # linear part linear_outputs = [] for name, col in cat_columns.items(): # get sparse tensor of input feature from feature column sp_tensor = col._get_sparse_tensors(builder) sp_ids = sp_tensor.id_tensor linear_weights = embedding_table.get_linear_weights(name) # linear_weights: (vocab_size, 1) # sp_ids: (batch_size, max_tokens_per_example) # sp_values: (batch_size, max_tokens_per_example) linear_output = embedding_ops.safe_embedding_lookup_sparse( linear_weights, sp_ids, None, combiner='sum', name='{}_linear_output'.format(name)) linear_outputs.append(linear_output) for name, col in val_columns.items(): dense_tensor = col._get_dense_tensor(builder) linear_weights = embedding_table.get_linear_weights(name) linear_output = tf.multiply(dense_tensor, linear_weights) linear_outputs.append(linear_output) # linear_outputs: (batch_szie, nonzero_feature_num) linear_outputs = tf.concat(linear_outputs, axis=1) # poly part for name, col, in cat_columns.items(): # get sparse tensor of input feature from feature column field = column_to_field.get(name, name) sp_tensor = col._get_sparse_tensors(builder) sp_ids = sp_tensor.id_tensor embed_weights = embedding_table.get_embed_weights(field) # embeddings: (batch_size, embed_dim) # x_i * v_i embeddings = embedding_ops.safe_embedding_lookup_sparse( embed_weights, sp_ids, None, combiner='sum', name='{}_{}_embedding'.format(field, name)) embedding_dict[field] = embeddings for name, col in val_columns.items(): field = column_to_field.get(name, name) dense_tensor = col._get_dense_tensor(builder) embed_weights = embedding_table.get_embed_weights(field) embeddings = tf.multiply(dense_tensor, embed_weights) embedding_dict[field] = embeddings with tf.variable_scope("dnn_embed"): x = tf.concat(list(embedding_dict.values()), axis=1) N = len(embedding_dict) T = sum([ embedding.get_shape().as_list()[1] for embedding in embedding_dict.values() ]) print("wkfm N:", N, " T:", T) indices = [] for i, embeddings in enumerate(embedding_dict.values()): dim = embeddings.get_shape().as_list()[1] indices.extend([i] * dim) indices.extend([len(embedding_dict)] * shape) outputs = [] for field, embeddings in embedding_dict.items(): di = dimension_config[ field] if field in dimension_config else embed_dim U = tf.get_variable('{}_wkfm'.format(field), [T, di], initializer=tf.glorot_normal_initializer(), trainable=True) wkfm_weights = tf.get_variable('{}_wkfm_weights'.format(field), [N], initializer=tf.ones_initializer, trainable=True) weights = tf.gather(wkfm_weights, indices) y = tf.matmul(weights * x, U) outputs.append(y) y = tf.concat(outputs, axis=1) y = x * y new_inputs = tf.concat([linear_outputs, y], 1) shared_weights = tf.get_variable( name="fm_share", dtype=tf.float32, shape=[new_inputs.get_shape().as_list()[1], 256], initializer=tf.glorot_normal_initializer(), regularizer=tf.contrib.layers.l2_regularizer(reg), trainable=True) new_inputs = tf.matmul(new_inputs, shared_weights) return new_inputs
def model_fn(features, labels, mode, params): """Bulid Model function f(x) for Estimator.""" #------hyperparameters---- field_size = params["field_size"] feature_size = params["feature_size"] embedding_size = params["embedding_size"] l2_reg = params["l2_reg"] learning_rate = params["learning_rate"] #optimizer = params["optimizer"] layers = map(int, params["deep_layers"].split(',')) dropout = map(float, params["dropout"].split(',')) num_pairs = field_size * (field_size - 1) / 2 #------bulid weights------ Global_Bias = tf.get_variable(name='bias', shape=[1], initializer=tf.constant_initializer(0.0)) Feat_Bias = tf.get_variable(name='linear', shape=[feature_size], initializer=tf.glorot_normal_initializer()) Feat_Emb = tf.get_variable(name='emb', shape=[feature_size, embedding_size], initializer=tf.glorot_normal_initializer()) #Prod_Kernel = tf.get_variable(name='kernel', shape=[embedding_size, num_pairs, embedding_size], initializer=tf.glorot_normal_initializer()) #------build feaure------- feat_ids = features['feat_ids'] # None * F * 1 feat_ids = tf.reshape(feat_ids,shape=[-1,field_size]) feat_vals = features['feat_vals'] # None * F * 1 feat_vals = tf.reshape(feat_vals,shape=[-1,field_size]) #------build f(x)------ with tf.variable_scope("Linear-part"): feat_wgts = tf.nn.embedding_lookup(Feat_Bias, feat_ids) # None * F * 1 y_linear = tf.reduce_sum(tf.multiply(feat_wgts, feat_vals),1) with tf.variable_scope("Embedding-layer"): embeddings = tf.nn.embedding_lookup(Feat_Emb, feat_ids) # None * F * K feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1]) embeddings = tf.multiply(embeddings, feat_vals) # None * F * K with tf.variable_scope("Product-layer"): if FLAGS.model_type == 'FNN': deep_inputs = tf.reshape(embeddings,shape=[-1,field_size*embedding_size]) elif FLAGS.model_type == 'Inner': row = [] col = [] for i in range(field_size-1): for j in range(i+1, field_size): row.append(i) col.append(j) p = tf.gather(embeddings, row, axis=1) q = tf.gather(embeddings, col, axis=1) #p = tf.reshape(p, [-1, num_pairs, embedding_size]) #q = tf.reshape(q, [-1, num_pairs, embedding_size]) inner = tf.reshape(tf.reduce_sum(p * q, [-1]), [-1, num_pairs]) # None * (F*(F-1)/2) deep_inputs = tf.concat([tf.reshape(embeddings,shape=[-1,field_size*embedding_size]), inner], 1) # None * ( F*K+F*(F-1)/2 ) elif FLAGS.model_type == 'Outer': #ERROR: NOT ready yet row = [] col = [] for i in range(field_size-1): for j in range(i+1, field_size): row.append(i) col.append(j) p = tf.gather(embeddings, row, axis=1) q = tf.gather(embeddings, col, axis=1) #p = tf.reshape(p, [-1, num_pairs, embedding_size]) #q = tf.reshape(q, [-1, num_pairs, embedding_size]) #einsum('i,j->ij', p, q) # output[i,j] = p[i]*q[j] # Outer product outer = tf.reshape(tf.einsum('api,apj->apij', p, q), [-1, num_pairs*embedding_size*embedding_size]) # None * (F*(F-1)/2*K*K) deep_inputs = tf.concat([tf.reshape(embeddings,shape=[-1,field_size*embedding_size]), outer], 1) # None * ( F*K+F*(F-1)/2*K*K ) with tf.variable_scope("Deep-part"): if mode == tf.estimator.ModeKeys.TRAIN: train_phase = True else: train_phase = False for i in range(len(layers)): deep_inputs = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=layers[i], \ weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='mlp%d' % i) if FLAGS.batch_norm: deep_inputs = batch_norm_layer(deep_inputs, train_phase=train_phase, scope_bn='bn_%d' %i) #放在RELU之后 https://github.com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm.md#bn----before-or-after-relu if mode == tf.estimator.ModeKeys.TRAIN: deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[i]) #Apply Dropout after all BN layers and set dropout=0.8(drop_ratio=0.2) #deep_inputs = tf.layers.dropout(inputs=deep_inputs, rate=dropout[i], training=mode == tf.estimator.ModeKeys.TRAIN) y_deep = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, \ weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='deep_out') y_d = tf.reshape(y_deep,shape=[-1]) with tf.variable_scope("PNN-out"): #y_bias = Global_Bias * tf.ones_like(labels, dtype=tf.float32) # None * 1 warning;这里不能用label,否则调用predict/export函数会出错,train/evaluate正常;初步判断estimator做了优化,用不到label是不传 y_bias = Global_Bias * tf.ones_like(y_d, dtype=tf.float32) # None * 1 y = y_bias + y_linear + y_d pred = tf.sigmoid(y) predictions={"prob": pred} export_outputs = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)} # Provide an estimator spec for `ModeKeys.PREDICT` if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, export_outputs=export_outputs) #------bulid loss------ loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)) + \ l2_reg * tf.nn.l2_loss(Feat_Bias) + l2_reg * tf.nn.l2_loss(Feat_Emb) # Provide an estimator spec for `ModeKeys.EVAL` eval_metric_ops = { "auc": tf.metrics.auc(labels, pred) } if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, eval_metric_ops=eval_metric_ops) #------bulid optimizer------ if FLAGS.optimizer == 'Adam': optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8) elif FLAGS.optimizer == 'Adagrad': optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate, initial_accumulator_value=1e-8) elif FLAGS.optimizer == 'Momentum': optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.95) elif FLAGS.optimizer == 'ftrl': optimizer = tf.train.FtrlOptimizer(learning_rate) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) # Provide an estimator spec for `ModeKeys.TRAIN` modes if mode == tf.estimator.ModeKeys.TRAIN: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op)
def test_matmul(): ''' Run tests on the Wave custom matmul operator. ''' tf.reset_default_graph() a = tf.get_variable("a", [2, 3], dtype=tf.float32, initializer=tf.glorot_normal_initializer()) b = tf.get_variable("b", [3, 4], dtype=tf.float32, initializer=tf.glorot_normal_initializer()) t_init = tf.global_variables_initializer() debug = False iters = 100 widgets = ["matmul test: ", pb.Percentage(), ' ', pb.Bar(), ' ', pb.ETA()] pbar = pb.ProgressBar(widgets=widgets, maxval=iters) pbar.start() for i in range(100): pbar.update(i) # NN variant with tf.Session(''): t_init.run() if debug: print( "Wave Kernel (NN):\n-------------------------------------------------" ) if debug: print("a: %s" % (a.eval())) if debug: print("b: %s" % (b.eval())) # (2, 3) * (3, 4) = (2, 4) z = waveflow.wavecomp_ops_module.wave_mat_mul(a, b).eval() if debug: print("z: %s" % (z)) # Convert to numpy a_np = np.array(a.eval()) b_np = np.array(b.eval()) z2 = np.matmul(a_np, b_np) if debug: print( "\nNumpy:\n-------------------------------------------------" ) if debug: print("a (np): %s" % (a_np)) if debug: print("b (np): %s" % (b_np)) if debug: print("z (np): %s" % (z2)) if debug: print("\n\n") assert np.allclose(z, z2, atol=0.1) # TN variant with tf.Session(''): t_init.run() if debug: print( "Wave Kernel (TN):\n-------------------------------------------------" ) a_t = tf.transpose(a) if debug: print("a: %s" % (a_t.eval())) if debug: print("b: %s" % (b.eval())) # (3, 2).T * (3, 4) = (2, 4) z = waveflow.wavecomp_ops_module.wave_mat_mul( a_t, b, transpose_a=True).eval() if debug: print("z: %s" % (z)) # Convert to numpy a_np = np.array(a_t.eval()) b_np = np.array(b.eval()) assert np.allclose(a.eval(), a_np.T) z2 = np.matmul(a_np.T, b_np) if debug: print( "\nNumpy:\n-------------------------------------------------" ) if debug: print("a (np): %s" % (a_np)) if debug: print("b (np): %s" % (b_np)) if debug: print("z (np): %s" % (z2)) if debug: print("\n\n") assert np.allclose(z, z2, atol=0.1) # NT variant with tf.Session(''): t_init.run() if debug: print( "Wave Kernel (NT):\n-------------------------------------------------" ) b_t = tf.transpose(b) if debug: print("a: %s" % (a.eval())) if debug: print("b: %s" % (b_t.eval())) z = waveflow.wavecomp_ops_module.wave_mat_mul( a, b_t, transpose_b=True).eval() if debug: print("z: %s" % (z)) # Convert to numpy a_np = np.array(a.eval()) b_np = np.array(b_t.eval()) z2 = np.matmul(a_np, b_np.T) if debug: print( "\nNumpy:\n-------------------------------------------------" ) if debug: print("a (np): %s" % (a_np)) if debug: print("b (np): %s" % (b_np)) if debug: print("z (np): %s" % (z2)) if debug: print("\n\n") assert np.allclose(z, z2, atol=0.1) # TT variant with tf.Session(''): t_init.run() if debug: print( "Wave Kernel (TT):\n-------------------------------------------------" ) a_t = tf.transpose(a) b_t = tf.transpose(b) if debug: print("a: %s" % (a_t.eval())) if debug: print("b: %s" % (b_t.eval())) # (3, 2).T * (4, 3).T = (2, 4) z = waveflow.wavecomp_ops_module.wave_mat_mul( a_t, b_t, transpose_a=True, transpose_b=True).eval() if debug: print("z: %s" % (z)) # Convert to numpy a_np = np.array(a_t.eval()) b_np = np.array(b_t.eval()) z2 = np.matmul(a_np.T, b_np.T) if debug: print( "\nNumpy:\n-------------------------------------------------" ) if debug: print("a (np): %s" % (a_np)) if debug: print("b (np): %s" % (b_np)) if debug: print("z (np): %s" % (z2)) if debug: print("\n\n") assert np.allclose(z, z2, atol=0.1) pbar.finish() return True
images = tf.placeholder(tf.float32, [None, 28 * 28]) image_labels = tf.placeholder(tf.float32, [None, 10]) def my_leaky_relu(x): return tf.nn.leaky_relu(x, alpha=.5) for i in range(num_layers): if i == 0: layers.append( tf.layers.dense(images, 128, activation=my_leaky_relu, kernel_initializer=tf.glorot_normal_initializer( seed=None, dtype=tf.float32), name=("Layer" + str(i)))) elif i == 63: layers.append( tf.layers.dense(layers[i - 1], 128, activation=my_leaky_relu, kernel_initializer=tf.glorot_normal_initializer( seed=None, dtype=tf.float32), name=("Layer_1_" + str(i)))) layers[i] = layers[i] + tf.layers.dense( images, 128, activation=None, use_bias=False,
def __init__( self, sequence_length, num_classes, vocab_size, tags_vocab_size, deps_vocab_size, embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0): # Placeholders for input, output and dropout self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x") self.input_tags = tf.placeholder(tf.int32, [None, sequence_length], name="input_tags") self.input_deps = tf.placeholder(tf.int32, [None, sequence_length], name="input_dependency") self.input_head = tf.placeholder(tf.int32, [None, sequence_length], name="input_head") self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") self.is_training = tf.placeholder(tf.bool, name="is_training") self.tempreture = tf.placeholder(tf.float32, name="Tempreture") # Keeping track of l2 regularization loss (optional) l2_loss = tf.constant(0.0) #initializer = tf.contrib.layers.variance_scaling_initializer() initializer = tf.glorot_normal_initializer() # Embedding layer with tf.device('/cpu:0'), tf.name_scope("embedding_words"): self.W = tf.get_variable("embed_W_words", [vocab_size, embedding_size], initializer=initializer) self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x) self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1) with tf.device('/cpu:0'), tf.name_scope("embedding_tags"): W_tags = tf.get_variable("embed_W_tags", [tags_vocab_size, embedding_size], initializer=initializer) embedded_tags = tf.nn.embedding_lookup(W_tags, self.input_tags) embedded_tags_expanded = tf.expand_dims(embedded_tags, -1) with tf.device('/cpu:0'), tf.name_scope("embedding_deps"): W_deps = tf.get_variable("embed_W_deps", [deps_vocab_size, embedding_size], initializer=initializer) embedded_deps = tf.nn.embedding_lookup(W_deps, self.input_deps) embedded_deps_expanded = tf.expand_dims(embedded_deps, -1) with tf.device('/cpu:0'), tf.name_scope("embedding_head"): W_head = tf.get_variable("embed_W_head", [vocab_size, embedding_size], initializer=initializer) embedded_head = tf.nn.embedding_lookup(W_head, self.input_head) embedded_head_expanded = tf.expand_dims(embedded_head, -1) cnn_inputs = tf.concat([self.embedded_chars_expanded, embedded_tags_expanded, embedded_deps_expanded, embedded_head_expanded], -1) print("Embedded Shape:", cnn_inputs.shape) # Create a convolution + maxpool layer for each filter size pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): # Convolution Layer filter_shape = [filter_size, embedding_size, 4, num_filters] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") #W = tf.get_variable("conv_{}_W".format(filter_size), shape=filter_shape, initializer=initializer) b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") conv = tf.nn.conv2d( cnn_inputs, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply BN conv = tf.layers.batch_normalization(conv, axis=-1, training=self.is_training) #axis定的是channel在的维度。 # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") # Maxpooling over the outputs pooled = tf.nn.max_pool( h, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) # Combine all the pooled features num_filters_total = num_filters * len(filter_sizes) self.h_pool = tf.concat(pooled_outputs, 3) self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total]) # Add dropout with tf.name_scope("dropout"): self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob) # Final (unnormalized) scores and predictions with tf.name_scope("output"): W = tf.get_variable( "W", shape=[num_filters_total, num_classes], initializer=tf.contrib.layers.xavier_initializer()) b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b") l2_loss += tf.nn.l2_loss(W) l2_loss += tf.nn.l2_loss(b) self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores") self.predictions = tf.argmax(self.scores, 1, name="predictions") self.probabilities = tf.nn.softmax(self.scores / self.tempreture) # Calculate mean cross-entropy loss with tf.name_scope("loss"): losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y) self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss # Accuracy with tf.name_scope("accuracy"): correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def dnn_layer(features): # para set feature_size = FLAGS.feature_size common_field_size = FLAGS.common_field_size embedding_size = FLAGS.embedding_size common_dims = common_field_size * embedding_size layers = map(int, (FLAGS.deep_layers).split(',')) dropout = map(float, (FLAGS.dropout).split(',')) l2_reg = FLAGS.l2_reg #{U-A-X-C不需要特殊处理的特征} feat_ids = features['feat_ids'] feat_vals = features['feat_vals'] #{multi-hot} video_ids = features['videoIdsids'] # ------bulid weights------ with tf.variable_scope("Embedding", reuse=tf.AUTO_REUSE): Feat_Emb = tf.get_variable(name='embeddings', shape=[feature_size, embedding_size], initializer=tf.glorot_normal_initializer()) #------build f(x)------ with tf.variable_scope("Embedding-layer", reuse=tf.AUTO_REUSE): common_embs = tf.nn.embedding_lookup(Feat_Emb, feat_ids) # None * F' * K feat_vals = tf.reshape(feat_vals, shape=[-1, common_field_size, 1]) # reshape for_warn uac_emb = tf.multiply(common_embs, feat_vals) video_emb = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=video_ids, sp_weights=None, combiner="sum") with tf.variable_scope("DNN-layer", reuse=tf.AUTO_REUSE): if FLAGS.batch_norm: if FLAGS.task_type == 'train': train_phase = True else: train_phase = False else: normalizer_fn = None normalizer_params = None x_deep = tf.concat( [tf.reshape(uac_emb, shape=[-1, common_dims]), video_emb], axis=1) # None * (F*K) for i in range(len(layers)): x_deep = tf.contrib.layers.fully_connected( inputs=x_deep, num_outputs=layers[i], weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='dnn%d' % i) if FLAGS.batch_norm: x_deep = batch_norm_layer(x_deep, train_phase=train_phase, scope_bn='bn_%d' % i) if FLAGS.task_type == 'train': x_deep = tf.nn.dropout( x_deep, keep_prob=dropout[i] ) #Apply Dropout after all BN layers and set dropout=0.8(drop_ratio=0.2) return x_deep
def generator(inputs, is_train=True, reuse=False): image_size = 64 s16 = image_size // 16 gf_dim = 64 # Dimension of gen filters in first conv layer. [64] c_dim = FLAGS.c_dim # n_color 3 w_init = tf.glorot_normal_initializer() gamma_init = tf.random_normal_initializer(1., 0.02) with tf.variable_scope("generator", reuse=reuse): net_in = InputLayer(inputs, name='g/in') net_h0 = DenseLayer(net_in, n_units=(gf_dim * 8 * s16 * s16), W_init=w_init, act=tf.identity, name='g/h0/lin') net_h0 = ReshapeLayer(net_h0, shape=[-1, s16, s16, gf_dim * 8], name='g/h0/reshape') net_h0 = BatchNormLayer(net_h0, decay=0.9, act=tf.nn.relu, is_train=is_train, gamma_init=gamma_init, name='g/h0/batch_norm') net_h1 = DeConv2d(net_h0, gf_dim * 4, (5, 5), strides=(2, 2), padding='SAME', act=None, W_init=w_init, name='g/h1/decon2d') net_h1 = BatchNormLayer(net_h1, decay=0.9, act=tf.nn.relu, is_train=is_train, gamma_init=gamma_init, name='g/h1/batch_norm') net_h2 = DeConv2d(net_h1, gf_dim * 2, (5, 5), strides=(2, 2), padding='SAME', act=None, W_init=w_init, name='g/h2/decon2d') net_h2 = BatchNormLayer(net_h2, decay=0.9, act=tf.nn.relu, is_train=is_train, gamma_init=gamma_init, name='g/h2/batch_norm') net_h3 = DeConv2d(net_h2, gf_dim, (5, 5), strides=(2, 2), padding='SAME', act=None, W_init=w_init, name='g/h3/decon2d') net_h3 = BatchNormLayer(net_h3, decay=0.9, act=tf.nn.relu, is_train=is_train, gamma_init=gamma_init, name='g/h3/batch_norm') net_h4 = DeConv2d(net_h3, c_dim, (5, 5), strides=(2, 2), padding='SAME', act=None, W_init=w_init, name='g/h4/decon2d') net_h4.outputs = tf.nn.tanh(net_h4.outputs) return net_h4
def model_fn(features, labels, mode, params): """Build Model function f(x) for Estimator.""" #------hyper parameters------ field_size = params['field_size'] feature_size = params['feature_size'] embedding_size = params['embedding_size'] l2_reg = params['l2_reg'] learning_rate = params['learning_rate'] dropout = params['dropout'] attention_factor = params['attention_factor'] #------build weights------ Global_Bias = tf.get_variable("bias", shape=[1], initializer=tf.constant_initializer(0.0)) Feat_Wgts = tf.get_variable("linear", shape=[feature_size], initializer=tf.glorot_normal_initializer()) Feat_Emb = tf.get_variable("emb", shape=[feature_size, embedding_size], initializer=tf.glorot_normal_initializer()) #------build feature------ feat_ids = features['feat_ids'] feat_vals = features['feat_vals'] feat_ids = tf.reshape(feat_ids, shape=[-1, field_size]) feat_vals = tf.reshape(feat_vals, shape=[-1, field_size]) # None * F #------build f(x)------ # FM部分: sum(wx) with tf.variable_scope("Linear-part"): feat_wgts = tf.nn.embedding_lookup(Feat_Wgts, feat_ids) # None * F * 1 y_linear = tf.reduce_sum(tf.multiply(feat_wgts, feat_vals), 1) #Deep部分 with tf.variable_scope("Embedding_Layer"): embeddings = tf.nn.embedding_lookup(Feat_Emb, feat_ids) # None * F * K feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1]) # None * F * 1 embeddings = tf.multiply(embeddings, feat_vals) # None * F * K with tf.variable_scope("Pair-wise_Interaction_Layer"): num_interactions = field_size * (field_size - 1) / 2 element_wise_product_list = [] for i in range(0, field_size): for j in range(i + 1, field_size): element_wise_product_list.append(tf.multiply(embeddings[:, i, :], embeddings[:, j, :])) element_wise_product_list = tf.stack(element_wise_product_list) # (F*(F-1)/2) * None * K stack拼接矩阵 element_wise_product_list = tf.transpose(element_wise_product_list, perm=[1,0,2]) # None * (F(F-1)/2) * K # 得到Attention Score with tf.variable_scope("Attention_Netowrk"): deep_inputs = tf.reshape(element_wise_product_list, shape=[-1, embedding_size]) # (None*F(F-1)/2) * K deep_inputs = contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=attention_factor, activation_fn=tf.nn.relu, \ weights_regularizer=contrib.layers.l2_regularizer(l2_reg), scope="attention_net_mlp") aij = contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, \ weights_regularizer=contrib.layers.l2_regularizer(l2_reg), scope="attention_net_out") # (None*F(F-1)/2) * 1 # 得到attention score之后,使用softmax进行规范化 aij = tf.reshape(aij, shape=[-1, int(num_interactions), 1]) aij_softmax = tf.nn.softmax(aij, dim=1, name="attention_net_softout") # None * num_interactions # TODO: 为什么要对attention score进行dropout那?? 这里不是很懂 if mode == tf.estimator.ModeKeys.TRAIN: aij_softmax = tf.nn.dropout(aij_softmax, keep_prob=dropout[0]) with tf.variable_scope("Attention-based_Pooling_Layer"): deep_inputs = tf.multiply(element_wise_product_list, aij_softmax) # None * (F(F-1)/2) * K deep_inputs = tf.reduce_sum(deep_inputs, axis=1) # None * K Pooling操作 # Attention-based Pooling Layer的输出也要经过Dropout if mode == tf.estimator.ModeKeys.TRAIN: deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[1]) # 该层的输出是一个K维度的向量 with tf.variable_scope("Prediction_Layer"): # 直接跟上输出单元 deep_inputs = contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, \ weights_regularizer=contrib.layers.l2_regularizer(l2_reg), scope="afm_out") # None * 1 y_deep = tf.reshape(deep_inputs, shape=[-1]) # None with tf.variable_scope("AFM_overall"): y_bias = Global_Bias * tf.ones_like(y_deep, dtype=tf.float32) y = y_bias + y_linear + y_deep pred = tf.nn.sigmoid(y) # set predictions predictions = {"prob": pred} export_outputs = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)} # Provide an estimator spec for `ModeKeys.PREDICT` if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, export_outputs=export_outputs) #------build loss------ loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)) + l2_reg * tf.nn.l2_loss(Feat_Wgts) + l2_reg * tf.nn.l2_loss(Feat_Emb) # Provide an estimator spec for `ModeKeys.EVAL` eval_metric_ops = { "auc": tf.metrics.auc(labels, pred) } if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, eval_metric_ops=eval_metric_ops) #------build optimizer------ optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) # Provide an estimator spec for `ModeKeys.TRAIN` if mode == tf.estimator.ModeKeys.TRAIN: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): print('params', params) field_size = params['field_size'] embedding_size = params['embedding_size'] feature_size = params['feature_size'] l2_reg = params['l2_reg'] learning_rate = params['learning_rate'] layers = list(map(int, params['deep_layer'].split(','))) dropout = list(map(float, params['dropout'].split(','))) Global_Bias = tf.get_variable('bias', shape=[1], initializer=tf.constant_initializer(0.0)) Feat_Bias = tf.get_variable('linear', shape=[feature_size], initializer=tf.glorot_normal_initializer()) Feat_Emb = tf.get_variable('emb', shape=[feature_size, embedding_size], initializer=tf.glorot_normal_initializer()) feat_ids = features['feat_ids'] feat_ids = tf.reshape(feat_ids, shape=[-1, field_size]) feat_vals = features['feat_vals'] feat_vals = tf.reshape(feat_vals, shape=[-1, field_size]) with tf.variable_scope('Linear-part'): feat_wgts = tf.nn.embedding_lookup(Feat_Bias, feat_ids) y_linear = tf.reduce_sum(tf.multiply(feat_wgts, feat_vals), 1) with tf.variable_scope('BiInter-part'): embedding = tf.nn.embedding_lookup(Feat_Emb, feat_ids) feat_vals = tf.reshape(feat_vals, [-1, field_size, 1]) embedding = tf.multiply(embedding, feat_vals) sum_square_emb = tf.square(tf.reduce_sum(embedding, 1)) square_sum_emb = tf.reduce_sum(tf.square(embedding), 1) deep_input = 0.5 * tf.subtract(sum_square_emb, square_sum_emb) with tf.variable_scope('Deep-part'): if mode == tf.estimator.ModeKeys.TRAIN: train_phase = True else: train_phase = False if mode == tf.estimator.ModeKeys.TRAIN: deep_input = tf.nn.dropout(deep_input, keep_prob=dropout[0]) for i in range(len(layers)): deep_input = tf.contrib.layers.fully_connected( inputs=deep_input, num_outputs=layers[i], weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='mlp%d' % i) if FLAGS.batch_norm: deep_input = batch_norm_layer(deep_input, train_phase=train_phase, scope_bn='bn_%d' % i) if mode == tf.estimator.ModeKeys.TRAIN: deep_input = tf.nn.dropout(deep_input, keep_prob=dropout[i + 1]) y_deep = tf.contrib.layers.fully_connected( inputs=deep_input, num_outputs=1, activation_fn=tf.identity, weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='deep_out') y_d = tf.reshape(y_deep, [-1]) with tf.variable_scope('NfM-out'): y_bias = Global_Bias * tf.ones_like(y_d, dtype=tf.float32) y = y_bias + y_linear + y_d pred = tf.sigmoid(y) predictions = {'prob': pred} export_outputs = { tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions) } if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=pred, export_outputs=export_outputs) loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=y) + l2_reg * tf.nn.l2_loss(Feat_Bias) + l2_reg * tf.nn.l2_loss(Feat_Emb)) eval_metric_ops = {'auc': tf.metrics.auc(labels, pred)} if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, predictions=predictions, loss=loss, eval_metric_ops=eval_metric_ops) if FLAGS.optimizer == 'Adam': opt = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8) elif FLAGS.optimizer == 'Adagrad': opt = tf.train.AdagradOptimizer(learning_rate, initial_accumulator_value=1e-8) elif FLAGS.optimizer == 'Momentum': opt = tf.train.MomentumOptimizer(learning_rate, momentum=0.95) elif FLAGS.optimizer == 'ftrl': opt = tf.train.FtrlOptimizer(learning_rate) train_op = opt.minimize(loss, global_step=tf.train.get_global_step()) if mode == tf.estimator.ModeKeys.TRAIN: return tf.estimator.EstimatorSpec(mode, predictions=predictions, loss=loss, train_op=train_op)
def discriminator(inputs, is_train=True, reuse=False): df_dim = 64 # Dimension of discrim filters in first conv layer. [64] w_init = tf.glorot_normal_initializer() gamma_init = tf.random_normal_initializer(1., 0.02) lrelu = lambda x: tf.nn.leaky_relu(x, 0.2) with tf.variable_scope("discriminator", reuse=reuse): net_in = InputLayer(inputs, name='d/in') net_h0 = Conv2d(net_in, df_dim, (5, 5), (2, 2), act=lrelu, padding='SAME', W_init=w_init, name='d/h0/conv2d') net_h1 = Conv2d(net_h0, df_dim * 2, (5, 5), (2, 2), act=None, padding='SAME', W_init=w_init, name='d/h1/conv2d') net_h1 = BatchNormLayer(net_h1, decay=0.9, act=lrelu, is_train=is_train, gamma_init=gamma_init, name='d/h1/batch_norm') net_h2 = Conv2d(net_h1, df_dim * 4, (5, 5), (2, 2), act=None, padding='SAME', W_init=w_init, name='d/h2/conv2d') net_h2 = BatchNormLayer(net_h2, decay=0.9, act=lrelu, is_train=is_train, gamma_init=gamma_init, name='d/h2/batch_norm') net_h3 = Conv2d(net_h2, df_dim * 8, (5, 5), (2, 2), act=None, padding='SAME', W_init=w_init, name='d/h3/conv2d') net_h3 = BatchNormLayer(net_h3, decay=0.9, act=lrelu, is_train=is_train, gamma_init=gamma_init, name='d/h3/batch_norm') net_h4 = FlattenLayer(net_h3, name='d/h4/flatten') net_h4 = DenseLayer(net_h4, n_units=1, act=tf.identity, W_init=w_init, name='d/h4/lin_sigmoid') logits = net_h4.outputs net_h4.outputs = tf.nn.sigmoid(net_h4.outputs) return net_h4, logits
def model_fn(features, labels, mode): """ the model_fn feeds into Estimator """ feature_columns = self.create_feature_columns(tf_transform_output) input_layer = tf.feature_column.input_layer( features=features, feature_columns=feature_columns) # Network structure # Batch norm after linear combination and before activation. Dropout after activation. h1 = tf.layers.Dense( units=MODEL_NUM_UNIT_SCALE * 4, activation=None, kernel_initializer=tf.glorot_normal_initializer(), bias_initializer=tf.zeros_initializer() )(input_layer) h1_bn = tf.layers.batch_normalization(h1, training=(mode == tf.estimator.ModeKeys.TRAIN)) h1_act = tf.nn.relu(h1_bn) h1_do = tf.layers.dropout( inputs=h1_act, rate=DROPOUT_PROB, training=(mode == tf.estimator.ModeKeys.TRAIN)) h2 = tf.layers.Dense( units=MODEL_NUM_UNIT_SCALE * 2, activation=None, kernel_initializer=tf.glorot_normal_initializer(), bias_initializer=tf.zeros_initializer() )(h1_do) h2_bn = tf.layers.batch_normalization(h2, training=(mode == tf.estimator.ModeKeys.TRAIN)) h2_act = tf.nn.relu(h2_bn) h2_do = tf.layers.dropout( inputs=h2_act, rate=DROPOUT_PROB, training=(mode == tf.estimator.ModeKeys.TRAIN)) # Head for label1 h30 = tf.layers.Dense( units=MODEL_NUM_UNIT_SCALE, activation=None, kernel_initializer=tf.glorot_normal_initializer(), bias_initializer=tf.zeros_initializer() )(h2_do) h3_bn0 = tf.layers.batch_normalization(h30, training=(mode == tf.estimator.ModeKeys.TRAIN)) h3_act0 = tf.nn.relu(h3_bn0) h3_do0 = tf.layers.dropout( inputs=h3_act0, rate=DROPOUT_PROB, training=(mode == tf.estimator.ModeKeys.TRAIN)) logits0 = tf.layers.Dense( units=2, activation=None, kernel_initializer=tf.glorot_normal_initializer(), bias_initializer=tf.zeros_initializer() )(h3_do0) softmax0 = tf.contrib.layers.softmax(logits0) q_values = tf.div(softmax0[:, 1] - tf.reduce_min(softmax0[:, 1]), tf.reduce_max(softmax0[:, 1]) - tf.reduce_min(softmax0[:, 1])) if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL: labels0 = labels # int64 Notice: use labels but not labels[0], because we only have 1 label now. onehot_labels0 = tf.one_hot(labels0, depth=2) # shape(2,0) should [batch_size, num_classes] , logit should [batch_size, num_classes] # logit(?,2) # `ror_20_days_bool` loss definition: weighting to correct for class imbalances. unweighted_losses0 = tf.losses.softmax_cross_entropy( onehot_labels=onehot_labels0, logits=logits0, reduction=Reduction.NONE) class_weights0 = tf.constant([[1., 1.]]) sample_weights0 = tf.reduce_sum(tf.multiply(onehot_labels0, class_weights0), 1) loss0 = tf.reduce_mean(unweighted_losses0 * sample_weights0) loss = loss0 # Metrics auroc0 = tf.metrics.auc(labels0, softmax0[:, 1], num_thresholds=10000, curve='ROC') prauc0 = tf.metrics.auc(labels0, softmax0[:, 1], num_thresholds=10000, curve='PR', summation_method='careful_interpolation') if mode == tf.estimator.ModeKeys.TRAIN: # MSE loss, optimized with Adam optimizer = tf.train.AdamOptimizer(FIX_LEARNING_RATE) # This is to make sure we also update the rolling mean/var for `tf.layers.batch_normalization` # (which is stored outside of the Estimator scope). update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) # TensorBoard performance metrics. with tf.name_scope('losses'): tf.summary.scalar('loss_ror_20', loss0) # TensorBoard model evolution over time. with tf.name_scope('layer_1'): weights = tf.get_default_graph().get_tensor_by_name(os.path.split(h1.name)[0] + '/kernel:0') biases = tf.get_default_graph().get_tensor_by_name(os.path.split(h1.name)[0] + '/bias:0') tf.summary.histogram('weights', weights) tf.summary.histogram('biases', biases) tf.summary.histogram('activations', h1_act) with tf.name_scope('layer_2'): weights = tf.get_default_graph().get_tensor_by_name(os.path.split(h2.name)[0] + '/kernel:0') biases = tf.get_default_graph().get_tensor_by_name(os.path.split(h2.name)[0] + '/bias:0') tf.summary.histogram('weights', weights) tf.summary.histogram('biases', biases) tf.summary.histogram('activations', h2_act) with tf.name_scope('layer_3_ror_20'): weights = tf.get_default_graph().get_tensor_by_name(os.path.split(h30.name)[0] + '/kernel:0') biases = tf.get_default_graph().get_tensor_by_name(os.path.split(h30.name)[0] + '/bias:0') tf.summary.histogram('weights', weights) tf.summary.histogram('biases', biases) tf.summary.histogram('activations', h3_act0) with tf.name_scope('logits_ror_20'): weights = tf.get_default_graph().get_tensor_by_name( os.path.split(logits0.name)[0] + '/kernel:0') biases = tf.get_default_graph().get_tensor_by_name(os.path.split(logits0.name)[0] + '/bias:0') tf.summary.histogram('weights', weights) tf.summary.histogram('biases', biases) tf.summary.histogram('activations', h3_act0) with tf.name_scope('q_values_ror_20'): tf.summary.histogram('q0', softmax0[:, 0]) tf.summary.histogram('q1', softmax0[:, 1]) # Log a few predictions.label0 : ror_xxx_days_bool # to watch the labels and softmax in training label_and_softmax0 = tf.stack([tf.cast(labels0, tf.float32), softmax0[:, 1]], axis=1) logging_hook = tf.train.LoggingTensorHook({ 'label_and_softmax0': label_and_softmax0[0:10, :], # label_and_softmax0 size is batch size in train_config "TRAIN_BATCH_SIZE" }, every_n_iter=LOG_FREQ_STEP) return tf.estimator.EstimatorSpec( mode=mode, loss=loss, train_op=train_op, training_hooks=[logging_hook]) elif mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode=mode, loss=loss, # These metrics are computed over the complete eval dataset. eval_metric_ops={ 'metrics_ror_20_days_bool/AUC_ROC': auroc0, 'metrics_ror_20_days_bool/AUC_PR': prauc0, }, predictions={SignatureKeys.PREDICTIONS: q_values}) elif mode == tf.estimator.ModeKeys.PREDICT: """ A policy derived from the Q-value network. This epsilon-greedy policy computes the seeds with the `TOP_SEEDS_K` values and replaces them according to a `epsilon_greedy_probability` probability with a random value in [0, 1000). """ # Indices of top `p.TOP_SEEDS_K` Q-values. top_q_idx = tf.nn.top_k(q_values, k=TOP_SEEDS_K)[1] sel_q_idx = tf.random_shuffle(top_q_idx)[0:SEEDS_K_FINAL] # Since seeds are in [1, `p.SEEDS_K_FINAL`], we have to add 1 to the index. predictions = sel_q_idx + 1 class_labels_ror_20 = tf.reshape( tf.tile(tf.constant(['0', '1']), (tf.shape(softmax0)[0],)), (tf.shape(softmax0)[0], 2)) export_outputs = { # Default output (used in serving-infra) # * output: Seed list. Requires using `SignatureKeys.OUTPUT` dict key, since this is # used by the downstream SRS. # * eps_rnd_selection: Boolean list of whether a random seed (with eps prob) # was recommend or a predicted seed. # * q_values: Q-values for all `SEED_LIST_LENGTH` seeds. SignatureDefs.DEFAULT: tf.estimator.export.PredictOutput( {SignatureKeys.OUTPUT: predictions, "q_values": tf.transpose(q_values)}), # Analysis output SignatureDefs.ANALYSIS_ROR_20: tf.estimator.export.ClassificationOutput( scores=softmax0, classes=class_labels_ror_20), SignatureDefs.ANALYSIS_Q: tf.estimator.export.RegressionOutput( value=q_values) } return tf.estimator.EstimatorSpec( mode=mode, predictions={SignatureKeys.PREDICTIONS: q_values}, export_outputs=export_outputs)
def model_fn(features, labels, mode, params, config): """Builds the model function for use in an Estimator. Arguments: features: The input features for the Estimator. labels: The labels, unused here. mode: Signifies whether it is train or test or predict. params: Some hyperparameters as a dictionary. config: The RunConfig, unused here. Returns: EstimatorSpec: A tf.estimator.EstimatorSpec instance. """ del labels, config # Set up the model's learnable parameters. logit_concentration = tf.get_variable( "logit_concentration", shape=[1, params["num_topics"]], initializer=tf.constant_initializer( _softplus_inverse(params["prior_initial_value"]))) concentration = _clip_dirichlet_parameters( tf.nn.softplus(logit_concentration)) num_words = features.shape[1] topics_words_logits = tf.get_variable( "topics_words_logits", shape=[params["num_topics"], num_words], initializer=tf.glorot_normal_initializer()) topics_words = tf.nn.softmax(topics_words_logits, axis=-1) # Compute expected log-likelihood. First, sample from the variational # distribution; second, compute the log-likelihood given the sample. lda_variational = make_lda_variational( params["activation"], params["num_topics"], params["layer_sizes"]) with ed.tape() as variational_tape: _ = lda_variational(features) with ed.tape() as model_tape: with ed.interception( make_value_setter(topics=variational_tape["topics_posterior"])): posterior_predictive = latent_dirichlet_allocation(concentration, topics_words) log_likelihood = posterior_predictive.distribution.log_prob(features) tf.summary.scalar("log_likelihood", tf.reduce_mean(log_likelihood)) # Compute the KL-divergence between two Dirichlets analytically. # The sampled KL does not work well for "sparse" distributions # (see Appendix D of [2]). kl = variational_tape["topics_posterior"].distribution.kl_divergence( model_tape["topics"].distribution) tf.summary.scalar("kl", tf.reduce_mean(kl)) # Ensure that the KL is non-negative (up to a very small slack). # Negative KL can happen due to numerical instability. with tf.control_dependencies([tf.assert_greater(kl, -1e-3, message="kl")]): kl = tf.identity(kl) elbo = log_likelihood - kl avg_elbo = tf.reduce_mean(elbo) tf.summary.scalar("elbo", avg_elbo) loss = -avg_elbo # Perform variational inference by minimizing the -ELBO. global_step = tf.train.get_or_create_global_step() optimizer = tf.train.AdamOptimizer(params["learning_rate"]) # This implements the "burn-in" for prior parameters (see Appendix D of [2]). # For the first prior_burn_in_steps steps they are fixed, and then trained # jointly with the other parameters. grads_and_vars = optimizer.compute_gradients(loss) grads_and_vars_except_prior = [ x for x in grads_and_vars if x[1] != logit_concentration] def train_op_except_prior(): return optimizer.apply_gradients( grads_and_vars_except_prior, global_step=global_step) def train_op_all(): return optimizer.apply_gradients( grads_and_vars, global_step=global_step) train_op = tf.cond( global_step < params["prior_burn_in_steps"], true_fn=train_op_except_prior, false_fn=train_op_all) # The perplexity is an exponent of the average negative ELBO per word. words_per_document = tf.reduce_sum(features, axis=1) log_perplexity = -elbo / words_per_document tf.summary.scalar("perplexity", tf.exp(tf.reduce_mean(log_perplexity))) (log_perplexity_tensor, log_perplexity_update) = tf.metrics.mean( log_perplexity) perplexity_tensor = tf.exp(log_perplexity_tensor) # Obtain the topics summary. Implemented as a py_func for simplicity. topics = tf.py_func( functools.partial(get_topics_strings, vocabulary=params["vocabulary"]), [topics_words, concentration], tf.string, stateful=False) tf.summary.text("topics", topics) return tf.estimator.EstimatorSpec( mode=mode, loss=loss, train_op=train_op, eval_metric_ops={ "elbo": tf.metrics.mean(elbo), "log_likelihood": tf.metrics.mean(log_likelihood), "kl": tf.metrics.mean(kl), "perplexity": (perplexity_tensor, log_perplexity_update), "topics": (topics, tf.no_op()), }, )
def icnr_weights(init=tf.glorot_normal_initializer(), scale=2, shape=[3, 3, 32, 4], dtype=tf.float32): sess = tf.Session() return sess.run(ICNR(init, scale=scale)(shape=shape, dtype=dtype))
def get_instance(args): # pylint: disable=unused-argument """ create an instance of the initializer """ return tf.glorot_normal_initializer(seed=SEED)
image = tf.reshape(image,(32*32*3,)) target = tf.one_hot(label,NUM_CLASSES) min_after_dequeue = 1000 capacity = min_after_dequeue + 3* BATCH_SIZE image_batch, target_batch = tf.train.shuffle_batch([image,target], batch_size= BATCH_SIZE, capacity=capacity, min_after_dequeue =min_after_dequeue) W = tf.get_variable("W", shape=(32*32,), initializer=tf.glorot_normal_initializer()) b = tf.get_variable("b", shape=(32,), initializer=tf.constant_initializer(0)) logits = tf.nn.xw_plus_b(image_batch,W,b) ce_loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=target_batch) opt = tf.train.AdamOptimizer().minimize(ce_loss) init = (tf.global_variables_initializer(), tf.local_variables_initializer())
def __init__(self, num_entities, num_relations): super(HyperER, self).__init__() self.entity_dim = 200 self.relation_dim = 200 self.num_entities = num_entities self.num_relations = num_relations self.in_channels = 1 self.out_channels = 32 self.kernal_h = 1 self.kernal_w = 9 self.dense1_size_out = self.in_channels * self.out_channels * self.kernal_h * self.kernal_w self.dense2_size_in = (1 - self.kernal_h + 1) * ( self.entity_dim - self.kernal_w + 1) * self.out_channels # self.inp_drop = 0.2 # self.feature_map_drop = 0.2 # self.hidden_drop = 0.3 self.weights_dense1 = tf.Variable(lambda: tf.glorot_normal_initializer( )([self.relation_dim, self.dense1_size_out])) self.bias_dense1 = tf.Variable(lambda: tf.glorot_normal_initializer() ([self.dense1_size_out])) self.weights_dense2 = tf.Variable(lambda: tf.glorot_normal_initializer( )([self.dense2_size_in, self.entity_dim])) self.bias_dense2 = tf.Variable(lambda: tf.glorot_normal_initializer() ([self.entity_dim])) self.bias_logits = tf.Variable(lambda: tf.glorot_normal_initializer() ([self.num_entities])) # Generate random embedding representaitons for and relations self.embedding_matrix_entities = tf.Variable( lambda: tf.glorot_normal_initializer() ([self.num_entities, self.entity_dim])) self.embedding_matrix_relations = tf.Variable( lambda: tf.glorot_normal_initializer() ([self.num_relations, self.relation_dim])) self.bn0 = tf.keras.layers.BatchNormalization(axis=3, momentum=0.1, epsilon=1e-05) self.bn1 = tf.keras.layers.BatchNormalization(axis=3, momentum=0.1, epsilon=1e-05) self.bn2 = tf.keras.layers.BatchNormalization(axis=1, momentum=0.1, epsilon=1e-05) self.inp_drop = tf.keras.layers.Dropout(0.2) self.feature_map_drop = tf.keras.layers.SpatialDropout2D(0.2) self.hidden_drop = tf.keras.layers.Dropout(0.3) # self.dense1 = tf.keras.layers.Dense(self.dense1_size_out) # self.dense2 = tf.keras.layers.Dense(self.entity_dim) self.add = tf.keras.layers.Add()
def __init__(self, alpha=0.03): self.alpha = alpha self.global_step = tf.train.get_or_create_global_step() self.matrix_init = tf.glorot_normal_initializer() self.zeros_init = tf.constant_initializer(0.)
def classifier_rot(self, x): with tf.variable_scope('classify_rot', reuse=tf.AUTO_REUSE): return tf.layers.dense( x, 4, kernel_initializer=tf.glorot_normal_initializer())
#-*- coding: utf-8 -*- ''' Author: Haoran Chen Initial Date: 9/11/2019 ''' import tensorflow as tf from layers import * global_kwargs = { 'initializer': tf.glorot_normal_initializer(), 'dtype': tf.float32, } class SGRU(): def __init__(self, options): ''' n_w is word embedding dimension. n_h is hidden state dimension. n_f is mid-input dimension. n_v is the size of vocabulary. n_t is the dimension of tagging. n_z is the total video dimension. n_z1 is the ECO dimension. n_z2 is the ResNeXt dimension. ''' self.options = options self.n_w = options.n_w self.n_h = options.n_h self.n_f = options.n_f self.n_t = options.n_t
def model_fn(self, features, labels, mode, params): field_size = params["training"]["field_size"] feature_size = params["training"]["feature_size"] embedding_size = params["training"]["embedding_size"] l2_reg = params["training"]["l2_reg"] learning_rate = params["training"]["learning_rate"] batch_norm = params["training"]["batch_norm"] batch_norm_decay = params["training"]["batch_norm_decay"] optimizer = params["training"]["optimizer"] seed = params["training"]["seed"] metric = params['output']['metric'] layers = params["training"]["deep_layers"] dropout = params["training"]["dropout"] np.random.seed(seed) tf.set_random_seed(seed) fm_bias = tf.get_variable(name='fm_bias', shape=[1], initializer=tf.constant_initializer(0.0)) fm_weight = tf.get_variable(name='fm_weight', shape=[feature_size], initializer=tf.glorot_normal_initializer()) fm_vector = tf.get_variable(name='fm_vector', shape=[feature_size, embedding_size], initializer=tf.glorot_normal_initializer()) with tf.variable_scope("Feature"): feat_ids = features['feat_ids'] feat_ids = tf.reshape(feat_ids, shape=[-1, field_size]) feat_vals = features['feat_vals'] feat_vals = tf.reshape(feat_vals, shape=[-1, field_size]) with tf.variable_scope("First_order"): feat_weights = tf.nn.embedding_lookup(fm_weight, feat_ids) y_w = tf.reduce_sum(tf.multiply(feat_weights, feat_vals), 1) with tf.variable_scope("Second_order"): embeddings = tf.nn.embedding_lookup(fm_vector, feat_ids) feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1]) embeddings = tf.multiply(embeddings, feat_vals) sum_square = tf.square(tf.reduce_sum(embeddings, 1)) square_sum = tf.reduce_sum(tf.square(embeddings), 1) y_v = 0.5 * tf.reduce_sum(tf.subtract(sum_square, square_sum), 1) with tf.variable_scope("Deep-part"): if batch_norm: if mode == tf.estimator.ModeKeys.TRAIN: train_phase = True else: train_phase = False deep_inputs = tf.reshape(embeddings, shape=[-1, field_size * embedding_size]) for i in range(len(layers)): deep_inputs = tf.contrib.layers.fully_connected( inputs=deep_inputs, num_outputs=layers[i], weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='mlp%d' % i) if batch_norm: deep_inputs = batch_norm_layer( deep_inputs, train_phase=train_phase, scope_bn='bn_%d' % i, batch_norm_decay=batch_norm_decay) if mode == tf.estimator.ModeKeys.TRAIN: deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[i]) y_deep = tf.contrib.layers.fully_connected( inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='deep_out') y_d = tf.reshape(y_deep, shape=[-1]) with tf.variable_scope("DeepFM-out"): y_bias = fm_bias * tf.ones_like(y_d, dtype=tf.float32) y = y_bias + y_w + y_v + y_d pred = tf.sigmoid(y) predictions = {"probabilities": pred} export_outputs = { tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)} # Provide an estimator spec for `ModeKeys.PREDICT` if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, export_outputs=export_outputs) with tf.name_scope("Loss"): loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)) + \ l2_reg * tf.nn.l2_loss(fm_weight) + l2_reg * tf.nn.l2_loss(fm_vector) # Provide an estimator spec for `ModeKeys.EVAL` eval_metric_ops = {} if metric == 'auc': eval_metric_ops['auc'] = tf.metrics.auc(labels, pred) else: raise TypeError("Can not find loss_type :", params['training']['loss_type']) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, eval_metric_ops=eval_metric_ops) with tf.name_scope("Optimizer"): if optimizer == 'adam': op = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8) elif optimizer == 'adagrad': op = tf.train.AdagradOptimizer( learning_rate=learning_rate, initial_accumulator_value=1e-8) elif optimizer == 'momentum': op = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.95) elif optimizer == 'ftrl': op = tf.train.FtrlOptimizer(learning_rate) else: raise TypeError("Can not find optimizer :", optimizer) train_op = op.minimize(loss, global_step=tf.train.get_global_step()) # Provide an estimator spec for `ModeKeys.TRAIN` modes if mode == tf.estimator.ModeKeys.TRAIN: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op)
def construct_network(self): """ Constructs a variant of the multi-head attention labeller (MHAL) that does not use keys, queries and values, but only a simple form of additive attention, as proposed by Yang et al. (2016). """ self.word_ids = tf.placeholder(tf.int32, [None, None], name="word_ids") self.char_ids = tf.placeholder(tf.int32, [None, None, None], name="char_ids") self.sentence_lengths = tf.placeholder(tf.int32, [None], name="sentence_lengths") self.word_lengths = tf.placeholder(tf.int32, [None, None], name="word_lengths") self.sentence_labels = tf.placeholder(tf.float32, [None], name="sentence_labels") self.word_labels = tf.placeholder(tf.float32, [None, None], name="word_labels") self.word_objective_weights = tf.placeholder( tf.float32, [None, None], name="word_objective_weights") self.sentence_objective_weights = tf.placeholder( tf.float32, [None], name="sentence_objective_weights") self.learning_rate = tf.placeholder(tf.float32, name="learning_rate") self.is_training = tf.placeholder(tf.int32, name="is_training") self.loss = 0.0 if self.config["initializer"] == "normal": self.initializer = tf.random_normal_initializer(stddev=0.1) elif self.config["initializer"] == "glorot": self.initializer = tf.glorot_uniform_initializer() elif self.config["initializer"] == "xavier": self.initializer = tf.glorot_normal_initializer() zeros_initializer = tf.zeros_initializer() self.word_embeddings = tf.get_variable( name="word_embeddings", shape=[len(self.word2id), self.config["word_embedding_size"]], initializer=(zeros_initializer if self.config["emb_initial_zero"] else self.initializer), trainable=(True if self.config["train_embeddings"] else False)) word_input_tensor = tf.nn.embedding_lookup(self.word_embeddings, self.word_ids) if self.config["char_embedding_size"] > 0 and self.config[ "char_recurrent_size"] > 0: with tf.variable_scope("chars"), tf.control_dependencies([ tf.assert_equal(tf.shape(self.char_ids)[2], tf.reduce_max(self.word_lengths), message="Char dimensions don't match") ]): self.char_embeddings = tf.get_variable( name="char_embeddings", shape=[ len(self.char2id), self.config["char_embedding_size"] ], initializer=self.initializer, trainable=True) char_input_tensor = tf.nn.embedding_lookup( self.char_embeddings, self.char_ids) char_input_tensor_shape = tf.shape(char_input_tensor) char_input_tensor = tf.reshape( char_input_tensor, shape=[ char_input_tensor_shape[0] * char_input_tensor_shape[1], char_input_tensor_shape[2], self.config["char_embedding_size"] ]) _word_lengths = tf.reshape(self.word_lengths, shape=[ char_input_tensor_shape[0] * char_input_tensor_shape[1] ]) char_lstm_cell_fw = tf.nn.rnn_cell.LSTMCell( self.config["char_recurrent_size"], use_peepholes=self.config["lstm_use_peepholes"], state_is_tuple=True, initializer=self.initializer, reuse=False) char_lstm_cell_bw = tf.nn.rnn_cell.LSTMCell( self.config["char_recurrent_size"], use_peepholes=self.config["lstm_use_peepholes"], state_is_tuple=True, initializer=self.initializer, reuse=False) # Concatenate the final forward and the backward character contexts # to obtain a compact character representation for each word. _, ((_, char_output_fw), (_, char_output_bw)) = tf.nn.bidirectional_dynamic_rnn( cell_fw=char_lstm_cell_fw, cell_bw=char_lstm_cell_bw, inputs=char_input_tensor, sequence_length=_word_lengths, dtype=tf.float32, time_major=False) char_output_tensor = tf.concat( [char_output_fw, char_output_bw], axis=-1) char_output_tensor = tf.reshape( char_output_tensor, shape=[ char_input_tensor_shape[0], char_input_tensor_shape[1], 2 * self.config["char_recurrent_size"] ]) # Include a char-based language modelling loss, LMc. if self.config["lm_cost_char_gamma"] > 0.0: self.loss += self.config["lm_cost_char_gamma"] * \ self.construct_lm_cost( input_tensor_fw=char_output_tensor, input_tensor_bw=char_output_tensor, sentence_lengths=self.sentence_lengths, target_ids=self.word_ids, lm_cost_type="separate", name="lm_cost_char_separate") if self.config["lm_cost_joint_char_gamma"] > 0.0: self.loss += self.config["lm_cost_joint_char_gamma"] * \ self.construct_lm_cost( input_tensor_fw=char_output_tensor, input_tensor_bw=char_output_tensor, sentence_lengths=self.sentence_lengths, target_ids=self.word_ids, lm_cost_type="joint", name="lm_cost_char_joint") if self.config["char_hidden_layer_size"] > 0: char_output_tensor = tf.layers.dense( inputs=char_output_tensor, units=self.config["char_hidden_layer_size"], activation=tf.tanh, kernel_initializer=self.initializer) if self.config["char_integration_method"] == "concat": word_input_tensor = tf.concat( [word_input_tensor, char_output_tensor], axis=-1) elif self.config["char_integration_method"] == "none": word_input_tensor = word_input_tensor else: raise ValueError("Unknown char integration method") if self.config["dropout_input"] > 0.0: dropout_input = (self.config["dropout_input"] * tf.cast(self.is_training, tf.float32) + (1.0 - tf.cast(self.is_training, tf.float32))) word_input_tensor = tf.nn.dropout(word_input_tensor, dropout_input, name="dropout_word") word_lstm_cell_fw = tf.nn.rnn_cell.LSTMCell( self.config["word_recurrent_size"], use_peepholes=self.config["lstm_use_peepholes"], state_is_tuple=True, initializer=self.initializer, reuse=False) word_lstm_cell_bw = tf.nn.rnn_cell.LSTMCell( self.config["word_recurrent_size"], use_peepholes=self.config["lstm_use_peepholes"], state_is_tuple=True, initializer=self.initializer, reuse=False) with tf.control_dependencies([ tf.assert_equal(tf.shape(self.word_ids)[1], tf.reduce_max(self.sentence_lengths), message="Sentence dimensions don't match") ]): (lstm_outputs_fw, lstm_outputs_bw), ((_, lstm_output_fw), (_, lstm_output_bw)) = \ tf.nn.bidirectional_dynamic_rnn( cell_fw=word_lstm_cell_fw, cell_bw=word_lstm_cell_bw, inputs=word_input_tensor, sequence_length=self.sentence_lengths, dtype=tf.float32, time_major=False) lstm_output_states = tf.concat([lstm_output_fw, lstm_output_bw], axis=-1) if self.config["dropout_word_lstm"] > 0.0: dropout_word_lstm = (self.config["dropout_word_lstm"] * tf.cast(self.is_training, tf.float32) + (1.0 - tf.cast(self.is_training, tf.float32))) lstm_outputs_fw = tf.nn.dropout( lstm_outputs_fw, dropout_word_lstm, noise_shape=tf.convert_to_tensor([ tf.shape(self.word_ids)[0], 1, self.config["word_recurrent_size"] ], dtype=tf.int32)) lstm_outputs_bw = tf.nn.dropout( lstm_outputs_bw, dropout_word_lstm, noise_shape=tf.convert_to_tensor([ tf.shape(self.word_ids)[0], 1, self.config["word_recurrent_size"] ], dtype=tf.int32)) lstm_output_states = tf.nn.dropout(lstm_output_states, dropout_word_lstm) # The forward and backward states are concatenated at every token position. lstm_outputs_states = tf.concat([lstm_outputs_fw, lstm_outputs_bw], axis=-1) if self.config["whidden_layer_size"] > 0: lstm_outputs_states = tf.layers.dense( lstm_outputs_states, self.config["whidden_layer_size"], activation=tf.tanh, kernel_initializer=self.initializer) if self.config["model_type"] == "last": processed_tensor = lstm_output_states token_scores = tf.layers.dense( lstm_outputs_states, units=len(self.label2id_tok), kernel_initializer=self.initializer, name="token_scores_last_lstm_outputs_ff") if self.config["hidden_layer_size"] > 0: processed_tensor = tf.layers.dense( processed_tensor, units=self.config["hidden_layer_size"], activation=tf.tanh, kernel_initializer=self.initializer) sentence_scores = tf.layers.dense( processed_tensor, units=len(self.label2id_sent), kernel_initializer=self.initializer, name="sentence_scores_last_lstm_outputs_ff") else: with tf.variable_scope("attention"): token_scores_list = [] sentence_scores_list = [] for i in range(len(self.label2id_tok)): keys = tf.layers.dense( lstm_outputs_states, units=self.config["attention_evidence_size"], activation=tf.tanh, kernel_initializer=self.initializer) values = tf.layers.dense( lstm_outputs_states, units=self.config["attention_evidence_size"], activation=tf.tanh, kernel_initializer=self.initializer) token_scores_head = tf.layers.dense( keys, units=1, kernel_initializer=self.initializer) # [B, M, 1] token_scores_head = tf.reshape( token_scores_head, shape=tf.shape(self.word_ids)) # [B, M] token_scores_list.append(token_scores_head) if self.config["attention_activation"] == "sharp": attention_weights_unnormalized = tf.exp( token_scores_head) elif self.config["attention_activation"] == "soft": attention_weights_unnormalized = tf.sigmoid( token_scores_head) elif self.config["attention_activation"] == "linear": attention_weights_unnormalized = token_scores_head else: raise ValueError( "Unknown/unsupported token scoring method: %s" % self.config["attention_activation"]) attention_weights_unnormalized = tf.where( tf.sequence_mask(self.sentence_lengths), attention_weights_unnormalized, tf.zeros_like(attention_weights_unnormalized)) attention_weights = attention_weights_unnormalized / tf.reduce_sum( attention_weights_unnormalized, axis=1, keep_dims=True) # [B, M] processed_tensor = tf.reduce_sum( values * attention_weights[:, :, numpy.newaxis], axis=1) # [B, E] if self.config["hidden_layer_size"] > 0: processed_tensor = tf.layers.dense( processed_tensor, units=self.config["hidden_layer_size"], activation=tf.tanh, kernel_initializer=self.initializer) sentence_score_head = tf.layers.dense( processed_tensor, units=1, kernel_initializer=self.initializer, name="output_ff_head_%d" % i) # [B, 1] sentence_score_head = tf.reshape( sentence_score_head, shape=[tf.shape(processed_tensor)[0]]) # [B] sentence_scores_list.append(sentence_score_head) token_scores = tf.stack(token_scores_list, axis=-1) # [B, M, H] all_sentence_scores = tf.stack(sentence_scores_list, axis=-1) # [B, H] if len(self.label2id_tok) != len(self.label2id_sent): if len(self.label2id_sent) == 2: default_sentence_score = tf.gather(all_sentence_scores, indices=[0], axis=1) # [B, 1] maximum_non_default_sentence_score = tf.gather( all_sentence_scores, indices=list(range(1, len(self.label2id_tok))), axis=1) # [B, num_heads-1] maximum_non_default_sentence_score = tf.reduce_max( maximum_non_default_sentence_score, axis=1, keep_dims=True) # [B, 1] sentence_scores = tf.concat( [ default_sentence_score, maximum_non_default_sentence_score ], axis=-1, name="sentence_scores_concatenation") # [B, 2] else: sentence_scores = tf.layers.dense( all_sentence_scores, units=len(self.label2id_sent), kernel_initializer=self.initializer ) # [B, num_sent_labels] else: sentence_scores = all_sentence_scores # Mask the token scores that do not fall in the range of the true sentence length. # Do this for each head (change shape from [B, M] to [B, M, num_heads]). tiled_sentence_lengths = tf.tile( input=tf.expand_dims(tf.sequence_mask(self.sentence_lengths), axis=-1), multiples=[1, 1, len(self.label2id_tok)]) self.token_probabilities = tf.nn.softmax(token_scores, axis=-1) self.token_probabilities = tf.where( tiled_sentence_lengths, self.token_probabilities, tf.zeros_like(self.token_probabilities)) self.token_predictions = tf.argmax(self.token_probabilities, axis=2) self.sentence_probabilities = tf.nn.softmax(sentence_scores) self.sentence_predictions = tf.argmax(self.sentence_probabilities, axis=1) if self.config["word_objective_weight"] > 0: word_objective_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=token_scores, labels=tf.cast(self.word_labels, tf.int32)) word_objective_loss = tf.where( tf.sequence_mask(self.sentence_lengths), word_objective_loss, tf.zeros_like(word_objective_loss)) self.loss += self.config["word_objective_weight"] * tf.reduce_sum( self.word_objective_weights * word_objective_loss) if self.config["sentence_objective_weight"] > 0: self.loss += self.config[ "sentence_objective_weight"] * tf.reduce_sum( self.sentence_objective_weights * tf.nn.sparse_softmax_cross_entropy_with_logits( logits=sentence_scores, labels=tf.cast(self.sentence_labels, tf.int32))) max_over_token_heads = tf.reduce_max(self.token_probabilities, axis=1) # [B, H] one_hot_sentence_labels = tf.one_hot(tf.cast(self.sentence_labels, tf.int32), depth=len(self.label2id_sent)) if self.config["enable_label_smoothing"]: one_hot_sentence_labels_smoothed = label_smoothing( one_hot_sentence_labels, epsilon=self.config["smoothing_epsilon"]) else: one_hot_sentence_labels_smoothed = one_hot_sentence_labels # At least one token has a label corresponding to the true sentence label. # This loss also pushes the maximums over the other heads towards 0 (but smoothed). if self.config["type1_attention_objective_weight"] > 0: this_max_over_token_heads = max_over_token_heads if len(self.label2id_tok) != len(self.label2id_sent): if len(self.label2id_sent) == 2: max_default_head = tf.gather(max_over_token_heads, indices=[0], axis=-1) # [B, 1] max_non_default_head = tf.reduce_max( tf.gather(max_over_token_heads, indices=list(range(1, len(self.label2id_tok))), axis=-1), axis=1, keep_dims=True) # [B, 1] this_max_over_token_heads = tf.concat( [max_default_head, max_non_default_head], axis=-1) # [B, 2] else: raise ValueError( "Unsupported attention loss for num_heads != num_sent_lables " "and num_sentence_labels != 2.") self.loss += self.config["type1_attention_objective_weight"] * ( tf.reduce_sum(self.sentence_objective_weights * tf.reduce_sum( tf.square(this_max_over_token_heads - one_hot_sentence_labels_smoothed), axis=-1))) # The predicted distribution over the token labels (heads) should be similar to the # predicted distribution over the sentence representations. if self.config["type2_attention_objective_weight"] > 0: all_sentence_scores_probabilities = tf.nn.softmax( all_sentence_scores) # [B, H] self.loss += self.config["type2_attention_objective_weight"] * ( tf.reduce_sum(self.sentence_objective_weights * tf.reduce_sum( tf.square(max_over_token_heads - all_sentence_scores_probabilities), axis=-1))) # At least one token has a label corresponding to the true sentence label. if self.config["type3_attention_objective_weight"] > 0: this_max_over_token_heads = max_over_token_heads if len(self.label2id_tok) != len(self.label2id_sent): if len(self.label2id_sent) == 2: max_default_head = tf.gather(max_over_token_heads, indices=[0], axis=-1) # [B, 1] max_non_default_head = tf.reduce_max( tf.gather(max_over_token_heads, indices=list(range(1, len(self.label2id_tok))), axis=-1), axis=1, keep_dims=True) # [B, 1] this_max_over_token_heads = tf.concat( [max_default_head, max_non_default_head], axis=-1) # [B, 2] else: raise ValueError( "Unsupported attention loss for num_heads != num_sent_lables " "and num_sentence_labels != 2.") self.loss += self.config["type3_attention_objective_weight"] * ( tf.reduce_sum( self.sentence_objective_weights * tf.reduce_sum(tf.square( (this_max_over_token_heads * one_hot_sentence_labels) - one_hot_sentence_labels_smoothed), axis=-1))) # A sentence that has a default label, should only contain tokens labeled as default. if self.config["type4_attention_objective_weight"] > 0: default_head = tf.gather(self.token_probabilities, indices=[0], axis=-1) # [B, M, 1] default_head = tf.squeeze(default_head, axis=-1) # [B, M] self.loss += self.config["type4_attention_objective_weight"] * ( tf.reduce_sum( self.sentence_objective_weights * tf.cast(tf.equal(self.sentence_labels, 0.0), tf.float32) * tf.reduce_sum( tf.square(default_head - tf.ones_like(default_head)), axis=-1))) # Every sentence has at least one default label. if self.config["type5_attention_objective_weight"] > 0: default_head = tf.gather(self.token_probabilities, indices=[0], axis=-1) # [B, M, 1] max_default_head = tf.reduce_max(tf.squeeze(default_head, axis=-1), axis=-1) # [B] self.loss += self.config["type5_attention_objective_weight"] * ( tf.reduce_sum(self.sentence_objective_weights * tf.square(max_default_head - tf.ones_like(max_default_head)))) # Include a word-based language modelling loss, LMw. if self.config["lm_cost_lstm_gamma"] > 0.0: self.loss += self.config[ "lm_cost_lstm_gamma"] * self.construct_lm_cost( input_tensor_fw=lstm_outputs_fw, input_tensor_bw=lstm_outputs_bw, sentence_lengths=self.sentence_lengths, target_ids=self.word_ids, lm_cost_type="separate", name="lm_cost_lstm_separate") if self.config["lm_cost_joint_lstm_gamma"] > 0.0: self.loss += self.config[ "lm_cost_joint_lstm_gamma"] * self.construct_lm_cost( input_tensor_fw=lstm_outputs_fw, input_tensor_bw=lstm_outputs_bw, sentence_lengths=self.sentence_lengths, target_ids=self.word_ids, lm_cost_type="joint", name="lm_cost_lstm_joint") self.train_op = self.construct_optimizer( opt_strategy=self.config["opt_strategy"], loss=self.loss, learning_rate=self.learning_rate, clip=self.config["clip"]) print("Notwork built.")
size = 400 inp = tf.placeholder(tf.float32, shape=(None, 5)) seed = tf.get_variable('seed', (5, ), initializer=tf.random_uniform_initializer(minval=-10, maxval=10)) batch_size = tf.shape(inp)[0] h = tf.concat([inp, tf.tile(tf.expand_dims(seed, 0), [batch_size, 1])], 1) h = tf.layers.dense(h, 20, activation=tf.nn.sigmoid, kernel_initializer=tf.glorot_normal_initializer(), bias_initializer=tf.random_uniform_initializer( minval=-0.1, maxval=0.1)) * 100 h = tf.layers.dense(h, 10, activation=tf.nn.sigmoid, kernel_initializer=tf.glorot_normal_initializer(), bias_initializer=tf.random_uniform_initializer( minval=-0.1, maxval=0.1)) * 8 h = tf.layers.dense(h, 1, activation=tf.sin, kernel_initializer=tf.glorot_normal_initializer(), bias_initializer=tf.glorot_normal_initializer()) if True:
def masked_dense(inputs, units, num_blocks=None, exclusive=False, kernel_initializer=None, reuse=None, name=None, *args, # pylint: disable=keyword-arg-before-vararg **kwargs): """A autoregressively masked dense layer. Analogous to `tf.layers.dense`. See [Germain et al. (2015)][1] for detailed explanation. Arguments: inputs: Tensor input. units: Python `int` scalar representing the dimensionality of the output space. num_blocks: Python `int` scalar representing the number of blocks for the MADE masks. exclusive: Python `bool` scalar representing whether to zero the diagonal of the mask, used for the first layer of a MADE. kernel_initializer: Initializer function for the weight matrix. If `None` (default), weights are initialized using the `tf.glorot_random_initializer`. reuse: Python `bool` scalar representing whether to reuse the weights of a previous layer by the same name. name: Python `str` used to describe ops managed by this function. *args: `tf.layers.dense` arguments. **kwargs: `tf.layers.dense` keyword arguments. Returns: Output tensor. Raises: NotImplementedError: if rightmost dimension of `inputs` is unknown prior to graph execution. #### References [1]: Mathieu Germain, Karol Gregor, Iain Murray, and Hugo Larochelle. MADE: Masked Autoencoder for Distribution Estimation. In _International Conference on Machine Learning_, 2015. https://arxiv.org/abs/1502.03509 """ # TODO(b/67594795): Better support of dynamic shape. input_depth = inputs.shape.with_rank_at_least(1)[-1].value if input_depth is None: raise NotImplementedError( "Rightmost dimension must be known prior to graph execution.") mask = _gen_mask(num_blocks, input_depth, units, MASK_EXCLUSIVE if exclusive else MASK_INCLUSIVE).T if kernel_initializer is None: kernel_initializer = tf.glorot_normal_initializer() def masked_initializer(shape, dtype=None, partition_info=None): return mask * kernel_initializer(shape, dtype, partition_info) with tf.name_scope(name, "masked_dense", [inputs, units, num_blocks]): layer = layers.Dense( units, kernel_initializer=masked_initializer, kernel_constraint=lambda x: mask * x, name=name, dtype=inputs.dtype.base_dtype, _scope=name, _reuse=reuse, *args, # pylint: disable=keyword-arg-before-vararg **kwargs) return layer.apply(inputs)
def model_fn(features, labels, mode, params): """Build Model function f(x) for Estimator.""" #------hyper parameters------ field_size = params['field_size'] feature_size = params['feature_size'] embedding_size = params['embedding_size'] l2_reg = params['l2_reg'] learning_rate = params['learning_rate'] dropout = params['dropout'] layers = params['layers'] #------build weights------ Global_Bias = tf.get_variable(name='bias', shape=[1], initializer=tf.constant_initializer(0.0)) Feat_Wgts = tf.get_variable(name='linear', shape=[feature_size], initializer=tf.glorot_normal_initializer()) Feat_Emb = tf.get_variable(name='emb', shape=[feature_size, embedding_size], initializer=tf.glorot_normal_initializer()) #------build feature------ feat_ids = features['feat_ids'] feat_ids = tf.reshape(feat_ids, shape=[-1, field_size]) feat_vals = features['feat_vals'] feat_vals = tf.reshape(feat_vals, shape=[-1, field_size]) #------build f(x)------ # f(x) = bias + sum(wx) + MLP(BI(embed_vec)) # FM部分 with tf.variable_scope("Linear-part"): feat_wgts = tf.nn.embedding_lookup(Feat_Wgts, feat_ids) # None * F * 1 y_linear = tf.reduce_sum(tf.multiply(feat_wgts, feat_vals), 1) # None * 1 with tf.variable_scope("BiInter-part"): embeddings = tf.nn.embedding_lookup(Feat_Emb, feat_ids) # None * F * k feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1]) # None * F * 1 embeddings = tf.multiply(embeddings, feat_vals) # vi * xi sum_square_emb = tf.square(tf.reduce_sum(embeddings, 1)) square_sum_emb = tf.reduce_sum(tf.square(embeddings), 1) deep_inputs = 0.5 * tf.subtract(sum_square_emb, square_sum_emb) # None * k with tf.variable_scope("Deep-part"): if mode == tf.estimator.ModeKeys.TRAIN: train_phase = True else: train_phase = False # BI的输出需要进行Batch Normalization deep_inputs = batch_norm_layer(deep_inputs, train_phase=train_phase, scope_bn="bn_after_bi") # BI的输出进行Dropout if mode == tf.estimator.ModeKeys.TRAIN: deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[-1]) # dropout at bilinear interaction layer for i in range(len(layers)): deep_inputs = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=layers[i], weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope="mlp%d" % i) # 注意是先进行Batch Norm,再进行Dropout # Batch Normalization deep_inputs = batch_norm_layer(deep_inputs, train_phase=train_phase, scope_bn="bn%d" % i) # Dropout if mode == tf.estimator.ModeKeys.TRAIN: deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[i]) # Output y_deep = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope="deep_out") y_d = tf.reshape(y_deep, shape=[-1]) with tf.variable_scope("NFM-out"): y_bias = Global_Bias * tf.ones_like(y_d, dtype=tf.float32) y = y_bias + y_linear + y_d pred = tf.sigmoid(y) predictions = {"prob": pred} export_outputs = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)} # Provide an estimator spec for `ModeKeys.PREDICT` if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, export_outputs=export_outputs) #------build loss------ loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)) + l2_reg * tf.nn.l2_loss(Feat_Wgts) + l2_reg * tf.nn.l2_loss(Feat_Emb) # Provide an estimator spec for `ModeKeys.EVAL` eval_metric_ops = { "auc": tf.metrics.auc(labels, pred) } if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, eval_metric_ops=eval_metric_ops) #------build optimizer------ optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) # Provide an estimator spec for `ModeKeys.TRAIN` modes if mode == tf.estimator.ModeKeys.TRAIN: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op)
if not os.path.exists(TRAINLOG): os.makedirs(TRAINLOG) if not os.path.exists(TESTLOG): os.makedirs(TESTLOG) # general configs epochs = 1 batch_size = 128 # network configs config = dict() config["tf"] = dict() config["tf"]["dense"] = {"activation": tf.nn.relu, "use_bias": True, "kernel_initializer": tf.glorot_normal_initializer(), "bias_initializer": tf.zeros_initializer(), "kernel_regularizer": None, "bias_regularizer": None, "activity_regularizer": None, "kernel_constraint": None, "bias_constraint": None, "trainable": True, "name": None, "reuse": None} config["tf"]["conv2d"] = {"activation": tf.nn.relu, "strides": (1, 1), "padding": "same", "data_format": "channels_last", "dilation_rate": (1, 1),
import tensorflow as tf env = gym.make('CartPole-v0') env.reset() n_inputs = 4 # == env.observation_space.shape[0] n_hidden1 = 10 n_hidden2 = 10 n_outputs = 1 # only outputs the probability of accelerating left learning_rate = 0.01 X = tf.placeholder(tf.float32, shape=[None, n_inputs]) y = tf.placeholder(tf.float32, shape=[None, n_outputs]) hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, kernel_initializer=tf.glorot_normal_initializer()) hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, kernel_initializer=tf.glorot_normal_initializer()) logits = tf.layers.dense(hidden2, n_outputs) outputs = tf.nn.sigmoid(logits) p_left_and_right = tf.concat(axis=1, values=[outputs, 1 - outputs]) action = tf.multinomial(tf.log(p_left_and_right), num_samples=1) cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=logits) optimizer = tf.train.AdamOptimizer(learning_rate) training_op = optimizer.minimize(cross_entropy)
def train(self, batch_data, is_train=True): """ 1 定义输入数据 """ print("1 定义输入数据") with tf.name_scope('input_data'): # 标签:[batch_size, 1] labels = batch_data['labels'] # 用户特征向量:[batch_size, feature_size] dense_vector = tf.reshape(batch_data['dense_vector'], shape=[-1, self.feature_size ]) # None * feature_size print("%s: %s" % ("dense_vector", dense_vector)) print("%s: %s" % ("labels", labels)) """ 2 FM层网络输出 """ print("2 FM层网络输出") with tf.name_scope("FM"): # FM参数,生成或者获取W V with tf.variable_scope("fm_layer", reuse=tf.AUTO_REUSE): self.FM_W = tf.get_variable( name='fm_w', shape=[self.feature_size, 1], initializer=tf.glorot_normal_initializer()) self.FM_V = tf.get_variable( name='fm_v', shape=[self.feature_size, self.fm_v_size], initializer=tf.glorot_normal_initializer()) print("%s: %s" % ("FM_W", self.FM_W)) print("%s: %s" % ("FM_V", self.FM_V)) # 输入样本准备 Input_x = tf.reshape(dense_vector, shape=[-1, self.feature_size, 1]) # None * feature_size print("%s: %s" % ("Input_x", Input_x)) # ---------- W * X ---------- Y_first = tf.reduce_sum(tf.multiply(self.FM_W, Input_x), 2) # None * F ## 增加dropout,防止过拟合 if is_train and self.is_dropout_fm: Y_first = tf.nn.dropout(Y_first, self.dropout_fm[0]) # None * F print("%s: %s" % ("Y_first", Y_first)) # ---------- Vij * Wij --------------- # sum_square part embeddings = tf.multiply(self.FM_V, Input_x) # None * V * X print("%s: %s" % ("embeddings", embeddings)) summed_features_emb = tf.reduce_sum(embeddings, 1) # sum(v*x) summed_features_emb_square = tf.square( summed_features_emb) # (sum(v*x))^2 # square_sum part squared_features_emb = tf.square(embeddings) # (v*x)^2 squared_sum_features_emb = tf.reduce_sum(squared_features_emb, 1) # sum((v*x)^2) # second order Y_second = 0.5 * tf.subtract( summed_features_emb_square, squared_sum_features_emb) # 0.5*((sum(v*x))^2 - sum((v*x)^2)) if is_train and self.is_dropout_fm: Y_second = tf.nn.dropout(Y_second, self.dropout_fm[1]) # None * K print("%s: %s" % ("Y_second", Y_second)) # 正则化,默认L2 if reg_type == 'l1_reg': lr_regularization = tf.reduce_sum(tf.abs(self.FM_W)) fm_regularization = tf.reduce_sum(tf.abs(self.FM_V)) elif reg_type == 'l2_reg': lr_regularization = tf.nn.l2_loss(self.FM_W) fm_regularization = tf.nn.l2_loss(self.FM_V) else: lr_regularization = tf.nn.l2_loss(self.FM_W) fm_regularization = tf.nn.l2_loss(self.FM_V) """ 3 Deep层网络输出 """ print("3 Deep层网络输出") with tf.name_scope("Deep"): # 第一层计算 print("lay%s, input_size: %s, output_size: %s, active_fuc: %s" % (1, self.feature_size * self.fm_v_size, self.dnn_layer[0], self.dnn_active_fuc[0])) with tf.variable_scope("deep_layer1", reuse=tf.AUTO_REUSE): input_size = self.feature_size * self.fm_v_size output_size = self.dnn_layer[0] deep_inputs = tf.reshape(embeddings, shape=[-1, input_size]) # None * (F*K) print("%s: %s" % ("lay1, deep_inputs", deep_inputs)) # 输入dropout if is_train and self.is_dropout_dnn: deep_inputs = tf.nn.dropout(deep_inputs, self.dropout_dnn[0]) # 全连接计算 deep_outputs = self._udf_full_connect(deep_inputs, input_size, output_size, self.dnn_active_fuc[0]) print("%s: %s" % ("lay1, deep_outputs", deep_outputs)) # batch_norm if self.is_batch_norm: deep_outputs = tf.layers.batch_normalization( deep_outputs, axis=-1, training=is_train) # 输出dropout if is_train and self.is_dropout_dnn: deep_outputs = tf.nn.dropout(deep_outputs, dropout_dnn[1]) # 中间层计算 for i in range(len(self.dnn_layer) - 1): with tf.variable_scope("deep_layer%d" % (i + 2), reuse=tf.AUTO_REUSE): print( "lay%s, input_size: %s, output_size: %s, active_fuc: %s" % (i + 2, self.dnn_layer[i], self.dnn_layer[i + 1], self.dnn_active_fuc[i + 1])) # 全连接计算 deep_outputs = self._udf_full_connect( deep_outputs, self.dnn_layer[i], self.dnn_layer[i + 1], self.dnn_active_fuc[i + 1]) print("lay%s, deep_outputs: %s" % (i + 2, deep_outputs)) # batch_norm if self.is_batch_norm: deep_outputs = tf.layers.batch_normalization( deep_outputs, axis=-1, training=is_train) # 输出dropout if is_train and self.is_dropout_dnn: deep_outputs = tf.nn.dropout(deep_outputs, self.dropout_dnn[i + 2]) # 输出层计算 print("lay_last, input_size: %s, output_size: %s, active_fuc: %s" % (self.dnn_layer[-1], 1, self.dnn_active_fuc[-1])) with tf.variable_scope("deep_layer%d" % (len(dnn_layer) + 1), reuse=tf.AUTO_REUSE): deep_outputs = self._udf_full_connect(deep_outputs, self.dnn_layer[-1], 1, self.dnn_active_fuc[-1]) print("lay_last, deep_outputs: %s" % (deep_outputs)) # 正则化,默认L2 dnn_regularization = 0.0 for j in range(len(self.dnn_layer) + 1): with tf.variable_scope("deep_layer%d" % (j + 1), reuse=True): weights = tf.get_variable("weights") if self.reg_type == 'l1_reg': dnn_regularization = dnn_regularization + tf.reduce_sum( tf.abs(weights)) elif self.reg_type == 'l2_reg': dnn_regularization = dnn_regularization + tf.nn.l2_loss( weights) else: dnn_regularization = dnn_regularization + tf.nn.l2_loss( weights) # Deep输出 Y_deep = deep_outputs print("%s: %s" % ("Y_deep", Y_deep)) """ 4 DeepFM层网络输出 """ print("4 DeepFM层网络输出") # ---------- DeepFM ---------- with tf.name_scope("Deep_FM"): # 最后一层的输入层准备 concat_input = tf.concat([Y_first, Y_second, Y_deep], axis=1) if self.model_type == "deep_fm": concat_input = tf.concat([Y_first, Y_second, Y_deep], axis=1) print("%s: %s" % ("concat_input", concat_input)) input_size = self.feature_size + self.fm_v_size + self.dnn_layer[ -1] regularization = self.reg_w * lr_regularization + self.reg_v * fm_regularization + self.reg_dnn * dnn_regularization elif self.model_type == "fm": concat_input = tf.concat([Y_first, Y_second], axis=1) print("%s: %s" % ("concat_input", concat_input)) input_size = self.feature_size + self.fm_v_size regularization = self.reg_w * lr_regularization + self.reg_v * fm_regularization elif self.model_type == "dnn": concat_input = Y_deep print("%s: %s" % ("concat_input", concat_input)) input_size = self.dnn_layer[-1] regularization = self.reg_dnn * dnn_regularization elif self.model_type == "lr": concat_input = tf.concat([Y_first], axis=1) print("%s: %s" % ("concat_input", concat_input)) input_size = self.feature_size regularization = self.reg_w * lr_regularization else: concat_input = tf.concat([Y_first, Y_second, Y_deep], axis=1) print("%s: %s" % ("concat_input", concat_input)) input_size = self.feature_size + self.fm_v_size + self.dnn_layer[ -1] regularization = self.reg_w * lr_regularization + self.reg_v * fm_regularization + self.reg_dnn * dnn_regularization # 最后一层的输出,采用w*concat_input + b 全连接 ,也可以直接对concat_input进行sum求和 with tf.variable_scope("deepfm_out", reuse=tf.AUTO_REUSE): self.DF_W = tf.get_variable( name='df_w', shape=[input_size, 1], initializer=tf.glorot_normal_initializer()) self.DF_B = tf.get_variable( name='df_bias', shape=[1], initializer=tf.constant_initializer(0.0)) print("%s: %s" % ("DF_W", self.DF_W)) print("%s: %s" % ("DF_B", self.DF_B)) print("%s: %s" % ("out_lay_type", self.out_lay_type)) if self.out_lay_type == "line": Y_sum = tf.reduce_sum(concat_input, 1) # None * 1 print("%s: %s" % ("Y_sum", Y_sum)) Y_bias = self.DF_B * tf.ones_like(Y_sum, dtype=tf.float32) print("%s: %s" % ("Y_bias", Y_bias)) Y_Out = tf.add(Y_sum, Y_bias, name='Y_Out') elif self.out_lay_type == "matmul": Y_Out = tf.add(tf.matmul(concat_input, self.DF_W), self.DF_B, name='Y_Out') else: Y_sum = tf.reduce_sum(concat_input, 1) # None * 1 print("%s: %s" % ("Y_sum", Y_sum)) Y_bias = self.DF_B * tf.ones_like(Y_sum, dtype=tf.float32) print("%s: %s" % ("Y_bias", Y_bias)) Y_Out = tf.add(Y_sum, Y_bias, name='Y_Out') print("%s: %s" % ("Y_Out", Y_Out)) score = tf.nn.sigmoid(Y_Out, name='score') score = tf.reshape(score, shape=[-1, 1]) print("%s: %s" % ("score", score)) """ 5 定义损失函数和AUC指标 """ print("5 定义损失函数和AUC指标") with tf.name_scope("loss"): # loss:Squared_error,Cross_entropy ,FTLR if self.loss_fuc == 'Squared_error': loss = tf.reduce_mean( tf.reduce_sum(tf.square(labels - score), reduction_indices=[1])) + regularization elif self.loss_fuc == 'Cross_entropy': loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=tf.reshape(Y_Out, [-1]), labels=tf.reshape(labels, [-1]))) + regularization elif self.loss_fuc == 'FTLR': loss = tf.reduce_mean( tf.reduce_sum(tf.square(labels - score), reduction_indices=[1])) + regularization # AUC auc = tf.metrics.auc(labels, score) print("%s: %s" % ("labels", labels)) """ 6 设定optimizer """ print("6 设定optimizer") with tf.name_scope("optimizer"): with tf.variable_scope("optimizer", reuse=tf.AUTO_REUSE): #------bulid optimizer------ if self.train_optimizer == 'Adam': optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8) elif self.train_optimizer == 'Adagrad': optimizer = tf.train.AdagradOptimizer( learning_rate=learning_rate, initial_accumulator_value=1e-8) elif self.train_optimizer == 'Momentum': optimizer = tf.train.MomentumOptimizer( learning_rate=learning_rate, momentum=0.95) elif self.train_optimizer == 'ftrl': optimizer = tf.train.FtrlOptimizer(learning_rate) train_step = optimizer.minimize(loss, global_step=self.global_step) """7 设定summary,以便在Tensorboard里进行可视化 """ print("7 设定summary") with tf.name_scope("summaries"): tf.summary.scalar("loss", loss) tf.summary.scalar("accumulate_auc", auc[0]) tf.summary.histogram("FM_W", self.FM_W) tf.summary.histogram("FM_V", self.FM_V) for j in range(len(self.dnn_layer) + 1): with tf.variable_scope("deep_layer%d" % (j + 1), reuse=True): weights = tf.get_variable("weights") tf.summary.histogram("dnn_w_%d" % (j + 1), weights) # 好几个summary,所以这里要merge_all summary_op = tf.summary.merge_all() """8 返回结果 """ return Y_Out, score, regularization, loss, auc, train_step, labels, score, summary_op
def model_fn(features, labels, mode, params): """Bulid Model function f(x) for Estimator.""" #------hyperparameters---- field_size = params["field_size"] feature_size = params["feature_size"] embedding_size = params["embedding_size"] l2_reg = params["l2_reg"] learning_rate = params["learning_rate"] #batch_norm_decay = params["batch_norm_decay"] #optimizer = params["optimizer"] layers = map(int, params["deep_layers"].split(',')) dropout = map(float, params["dropout"].split(',')) #------bulid weights------ FM_B = tf.get_variable(name='fm_bias', shape=[1], initializer=tf.constant_initializer(0.0)) FM_W = tf.get_variable(name='fm_w', shape=[feature_size], initializer=tf.glorot_normal_initializer()) FM_V = tf.get_variable(name='fm_v', shape=[feature_size, embedding_size], initializer=tf.glorot_normal_initializer()) #------build feaure------- feat_ids = features['feat_ids'] feat_ids = tf.reshape(feat_ids,shape=[-1,field_size]) feat_vals = features['feat_vals'] feat_vals = tf.reshape(feat_vals,shape=[-1,field_size]) #------build f(x)------ with tf.variable_scope("First-order"): feat_wgts = tf.nn.embedding_lookup(FM_W, feat_ids) # None * F * 1 y_w = tf.reduce_sum(tf.multiply(feat_wgts, feat_vals),1) with tf.variable_scope("Second-order"): embeddings = tf.nn.embedding_lookup(FM_V, feat_ids) # None * F * K feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1]) embeddings = tf.multiply(embeddings, feat_vals) #vij*xi sum_square = tf.square(tf.reduce_sum(embeddings,1)) square_sum = tf.reduce_sum(tf.square(embeddings),1) y_v = 0.5*tf.reduce_sum(tf.subtract(sum_square, square_sum),1) # None * 1 with tf.variable_scope("Deep-part"): if FLAGS.batch_norm: #normalizer_fn = tf.contrib.layers.batch_norm #normalizer_fn = tf.layers.batch_normalization if mode == tf.estimator.ModeKeys.TRAIN: train_phase = True #normalizer_params = {'decay': batch_norm_decay, 'center': True, 'scale': True, 'updates_collections': None, 'is_training': True, 'reuse': None} else: train_phase = False #normalizer_params = {'decay': batch_norm_decay, 'center': True, 'scale': True, 'updates_collections': None, 'is_training': False, 'reuse': True} else: normalizer_fn = None normalizer_params = None deep_inputs = tf.reshape(embeddings,shape=[-1,field_size*embedding_size]) # None * (F*K) for i in range(len(layers)): #if FLAGS.batch_norm: # deep_inputs = batch_norm_layer(deep_inputs, train_phase=train_phase, scope_bn='bn_%d' %i) #normalizer_params.update({'scope': 'bn_%d' %i}) deep_inputs = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=layers[i], \ #normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, \ weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='mlp%d' % i) if FLAGS.batch_norm: deep_inputs = batch_norm_layer(deep_inputs, train_phase=train_phase, scope_bn='bn_%d' %i) #放在RELU之后 https://github.com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm.md#bn----before-or-after-relu if mode == tf.estimator.ModeKeys.TRAIN: deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[i]) #Apply Dropout after all BN layers and set dropout=0.8(drop_ratio=0.2) #deep_inputs = tf.layers.dropout(inputs=deep_inputs, rate=dropout[i], training=mode == tf.estimator.ModeKeys.TRAIN) y_deep = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, \ weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='deep_out') y_d = tf.reshape(y_deep,shape=[-1]) #sig_wgts = tf.get_variable(name='sigmoid_weights', shape=[layers[-1]], initializer=tf.glorot_normal_initializer()) #sig_bias = tf.get_variable(name='sigmoid_bias', shape=[1], initializer=tf.constant_initializer(0.0)) #deep_out = tf.nn.xw_plus_b(deep_inputs,sig_wgts,sig_bias,name='deep_out') with tf.variable_scope("DeepFM-out"): #y_bias = FM_B * tf.ones_like(labels, dtype=tf.float32) # None * 1 warning;这里不能用label,否则调用predict/export函数会出错,train/evaluate正常;初步判断estimator做了优化,用不到label时不传 y_bias = FM_B * tf.ones_like(y_d, dtype=tf.float32) # None * 1 y = y_bias + y_w + y_v + y_d pred = tf.sigmoid(y) predictions={"prob": pred} export_outputs = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)} # Provide an estimator spec for `ModeKeys.PREDICT` if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, export_outputs=export_outputs) #------bulid loss------ loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)) + \ l2_reg * tf.nn.l2_loss(FM_W) + \ l2_reg * tf.nn.l2_loss(FM_V) #+ \ l2_reg * tf.nn.l2_loss(sig_wgts) # Provide an estimator spec for `ModeKeys.EVAL` eval_metric_ops = { "auc": tf.metrics.auc(labels, pred) } if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, eval_metric_ops=eval_metric_ops) #------bulid optimizer------ if FLAGS.optimizer == 'Adam': optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8) elif FLAGS.optimizer == 'Adagrad': optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate, initial_accumulator_value=1e-8) elif FLAGS.optimizer == 'Momentum': optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.95) elif FLAGS.optimizer == 'ftrl': optimizer = tf.train.FtrlOptimizer(learning_rate) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) # Provide an estimator spec for `ModeKeys.TRAIN` modes if mode == tf.estimator.ModeKeys.TRAIN: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op)
def __init__(self, config): self.config = config self.N = config.N ######### not running out gpu sources ########## tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True self.sess = tf.Session(config=tf_config) ######### profiling ############################# #self.options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) #self.run_metadata = tf.RunMetadata() ############ define variables ################## self.W = {} self.b = {} self.scale = {} self.beta = {} self.pop_mean = {} self.pop_var = {} self.alpha = {} self.dn_vars = [] # pre name_block = "pre" self.W[name_block + "3_l_0"] = tf.get_variable( name_block + "3_l_0", [3, 3, config.patch_size[2], config.pre_Nfeat], dtype=tf.float32, initializer=tf.glorot_normal_initializer()) #self.create_bn_variables(name_block+"3_0", config.pre_Nfeat) self.W[name_block + "5_l_0"] = tf.get_variable( name_block + "5_l_0", [5, 5, config.patch_size[2], config.pre_Nfeat], dtype=tf.float32, initializer=tf.glorot_normal_initializer()) #self.create_bn_variables(name_block+"5_0", config.pre_Nfeat) self.W[name_block + "7_l_0"] = tf.get_variable( name_block + "7_l_0", [7, 7, config.patch_size[2], config.pre_Nfeat], dtype=tf.float32, initializer=tf.glorot_normal_initializer()) #self.create_bn_variables(name_block+"7_0", config.pre_Nfeat) self.dn_vars = self.dn_vars + [ self.W[name_block + "3_l_0"], self.W[name_block + "5_l_0"], self.W[name_block + "7_l_0"] ] for i in range(1, config.pre_n_layers): self.W[name_block + "3_l_" + str(i)] = tf.get_variable( name_block + "3_l_" + str(i), [3, 3, config.pre_Nfeat, config.pre_Nfeat], dtype=tf.float32, initializer=tf.glorot_normal_initializer()) #self.create_bn_variables(name_block+"3_"+str(i), config.pre_Nfeat) self.W[name_block + "5_l_" + str(i)] = tf.get_variable( name_block + "5_l_" + str(i), [5, 5, config.pre_Nfeat, config.pre_Nfeat], dtype=tf.float32, initializer=tf.glorot_normal_initializer()) #self.create_bn_variables(name_block+"5_"+str(i), config.pre_Nfeat) self.W[name_block + "7_l_" + str(i)] = tf.get_variable( name_block + "7_l_" + str(i), [7, 7, config.pre_Nfeat, config.pre_Nfeat], dtype=tf.float32, initializer=tf.glorot_normal_initializer()) #self.create_bn_variables(name_block+"7_"+str(i), config.pre_Nfeat) self.dn_vars = self.dn_vars + [ self.W[name_block + "3_l_" + str(i)], self.W[name_block + "5_l_" + str(i)], self.W[name_block + "7_l_" + str(i)] ] # pregconv name_block = "pregconv" for i in range(config.pregconv_n_layers): self.create_gconv_variables(name_block + "3", i, config.pre_Nfeat, config.pre_fnet_Nfeat, config.pre_Nfeat, config.rank_theta, config.stride_pregconv, config.stride_pregconv) self.create_gconv_variables(name_block + "5", i, config.pre_Nfeat, config.pre_fnet_Nfeat, config.pre_Nfeat, config.rank_theta, config.stride_pregconv, config.stride_pregconv) self.create_gconv_variables(name_block + "7", i, config.pre_Nfeat, config.pre_fnet_Nfeat, config.pre_Nfeat, config.rank_theta, config.stride_pregconv, config.stride_pregconv) #self.create_bn_variables(name_block, config.Nfeat) # hpf name_block = "hpf" self.create_conv_variables(name_block, 0, config.Nfeat, config.Nfeat) self.create_bn_variables(name_block + "_c_" + "_" + str(0), config.Nfeat) for i in range(config.hpf_n_layers): self.create_gconv_variables(name_block, i, config.Nfeat, config.hpf_fnet_Nfeat, config.Nfeat, config.rank_theta, config.stride, config.stride) #self.create_bn_variables(name_block+"_"+str(i), config.Nfeat) # prox name_block = "prox" for i in range(config.prox_n_layers): self.create_conv_variables(name_block, i, config.Nfeat, config.Nfeat) self.create_bn_variables(name_block + "_c_" + "_" + str(i), config.Nfeat) for j in range(config.lpf_n_layers): self.create_gconv_variables(name_block + str(i), j, config.Nfeat, config.prox_fnet_Nfeat, config.Nfeat, config.rank_theta, config.stride, config.stride) self.create_bn_variables(name_block + str(i) + "_" + str(j), config.Nfeat) self.alpha["alpha_" + str(i)] = tf.get_variable( "alpha_" + str(i), [], dtype=tf.float32, initializer=tf.constant_initializer(0.5)) self.beta["beta_" + str(i)] = tf.get_variable( "beta_" + str(i), [], dtype=tf.float32, initializer=tf.constant_initializer(0.5)) self.dn_vars = self.dn_vars + [ self.alpha["alpha_" + str(i)], self.beta["beta_" + str(i)] ] # last name_block = "last" self.create_gconv_variables(name_block, 0, config.Nfeat, config.prox_fnet_Nfeat, config.patch_size[2], config.rank_theta, config.stride, config.patch_size[2]) ############ define placeholders ############## self.x_clean = tf.placeholder("float", [ None, config.patch_size[0], config.patch_size[1], config.patch_size[2] ], name="clean_image") self.x_noisy = tf.placeholder("float", [ None, config.patch_size[0], config.patch_size[1], config.patch_size[2] ], name="noisy_image") self.is_training = tf.placeholder(tf.bool, (), name="is_training") self.local_mask = tf.placeholder("float", [ config.searchN, ], name="local_mask") self.id_mat = 2 * tf.eye(config.searchN) ########### computational graph ############### self.__make_compute_graph() ################## losses ##################### self.__make_loss() ################ optimizer ops ################ #update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) #with tf.control_dependencies(update_ops): #global_step = tf.Variable(0, trainable=False) #l_r = tf.train.exponential_decay(config.starter_learning_rate, global_step, config.decay_step, config.decay_rate, staircase=True) #self.opt = tf.train.AdamOptimizer(l_r) # create a copy of all trainable variables with `0` as initial values #self.accum_vars = [tf.Variable(tf.zeros_like(t_var.initialized_value()),trainable=False) for t_var in dn_vars] # create a op to initialize all accums vars #self.zero_accum_vars = [tv.assign(tf.zeros_like(tv)) for tv in self.accum_vars] # compute gradients for a batch #batch_grads_vars = self.opt.compute_gradients(self.loss, dn_vars) # collect the batch gradient into accumulated vars #self.accum_op = self.my_accum_op(batch_grads_vars) #self.accum_op = [self.accum_vars[i].assign_add(batch_grad_var[0]) if batch_grad_var[0] is not None else self.accum_vars[i].assign_add(tf.zeros_like(self.accum_vars[i])) for i, batch_grad_var in enumerate(batch_grads_vars)] # apply accums gradients #print [(self.accum_vars[i], batch_grad_var[1]) for i, batch_grad_var in enumerate(batch_grads_vars)] #print batch_grads_vars #grad_and_vars_final = [(self.accum_vars[i], batch_grad_var[1]) if batch_grad_var[0] is not None else (None, batch_grad_var[1]) for i, batch_grad_var in enumerate(batch_grads_vars)] #self.apply_accum = self.opt.apply_gradients(grad_and_vars_final) #self.apply_accum = self.opt.apply_gradients(batch_grads_vars) self.opt = tf.train.AdamOptimizer(config.end_learning_rate).minimize( self.loss, var_list=self.dn_vars, aggregation_method=tf.AggregationMethod.EXPERIMENTAL_TREE) ################# summaries ################### tf.summary.scalar('loss', self.loss) tf.summary.scalar('PSNR', self.psnr) tf.summary.image('denoised_image', tf.expand_dims(self.x_hat[0, :, :, :], 0)) tf.summary.image('noisy_image', tf.expand_dims(self.x_noisy[0, :, :, :], 0)) tf.summary.image('clean_image', tf.expand_dims(self.x_clean[0, :, :, :], 0)) self.summaries = tf.summary.merge_all() # Check if log_dir exists, if so delete contents #if tf.gfile.Exists(self.config.log_dir): # tf.gfile.DeleteRecursively(self.config.log_dir) # tf.gfile.MkDir(self.config.log_dir+'train/') # tf.gfile.MkDir(self.config.log_dir+'val/') self.train_summaries_writer = tf.summary.FileWriter( self.config.log_dir + 'train/', self.sess.graph) self.val_summaries_writer = tf.summary.FileWriter( self.config.log_dir + 'val/', self.sess.graph)
def graphing(self, features, labels, mode): self.logger.info('mode = {}'.format(mode)) p = self.hparam self.features, self.labels = features, labels for name, tensor in self.features.items(): setattr(self, name, tensor) with tf.variable_scope("init") as scope: init_fn = tf.glorot_normal_initializer() emb_init_fn = tf.glorot_uniform_initializer() self.b_global = tf.Variable(emb_init_fn(shape=[]), name="b_global") with tf.variable_scope("embedding") as scope: self.w_query_movie_ids = tf.Variable( emb_init_fn(shape=[self.n_items, p.dim]), name="w_query_movie_ids") self.b_query_movie_ids = tf.Variable( emb_init_fn(shape=[p.dim]), name="b_query_movie_ids") self.w_candidate_movie_id = tf.Variable( init_fn(shape=[self.n_items, p.dim]), name="w_candidate_movie_id") self.b_candidate_movie_id = tf.Variable( init_fn(shape=[p.dim + 8 + 2]), name="b_candidate_movie_id") # self.b_candidate_movie_id = tf.Variable(init_fn(shape=[self.n_items]), name="b_candidate_movie_id") self.w_genres = tf.Variable( emb_init_fn(shape=[self.n_genres, 8]), name="w_genres") with tf.variable_scope("user_encoding") as scope: # query_movie embedding self.emb_query = tf.nn.embedding_lookup(self.w_query_movie_ids, self.query_movie_ids) query_movie_mask = tf.expand_dims( tf.nn.l2_normalize( tf.to_float(tf.sequence_mask(self.query_movie_ids_len)), 1), -1) self.emb_query = tf.reduce_sum(self.emb_query * query_movie_mask, 1) self.query_bias = tf.matmul(self.emb_query, self.b_query_movie_ids[:, tf.newaxis]) self.emb_query = tf.layers.dense(self.emb_query, 128, kernel_initializer=init_fn, activation=tf.nn.selu) self.emb_query = tf.layers.dense(self.emb_query, 64, kernel_initializer=init_fn, activation=tf.nn.selu) self.emb_query = tf.layers.dense(self.emb_query, 32, kernel_initializer=init_fn, activation=tf.nn.selu) # self.emb_query = tf.layers.dense(self.emb_query, 16, kernel_initializer=init_fn, activation=tf.nn.selu) # encode [item embedding + item metadata] with tf.variable_scope("item_encoding") as scope: # candidate_movie embedding self.candidate_emb = tf.nn.embedding_lookup( self.w_candidate_movie_id, self.candidate_movie_id) # genres embedding self.emb_genres = tf.nn.embedding_lookup(self.w_genres, tf.to_int32(self.genres)) genres_mask = tf.expand_dims( tf.nn.l2_normalize( tf.to_float(tf.sequence_mask(self.genres_len)), 1), -1) self.emb_genres = tf.reduce_sum(self.emb_genres * genres_mask, 1) self.emb_item = tf.concat([ self.candidate_emb, self.emb_genres, self.avg_rating[:, tf.newaxis], self.year[:, tf.newaxis] ], 1) self.candidate_bias = tf.matmul( self.emb_item, self.b_candidate_movie_id[:, tf.newaxis]) self.emb_item = tf.layers.dense(self.emb_item, 128, kernel_initializer=init_fn, activation=tf.nn.selu) self.emb_item = tf.layers.dense(self.emb_item, 64, kernel_initializer=init_fn, activation=tf.nn.selu) self.emb_item = tf.layers.dense(self.emb_item, 32, kernel_initializer=init_fn, activation=tf.nn.selu) # self.emb_item = tf.layers.dense(self.emb_item, 16, kernel_initializer=init_fn, activation=tf.nn.selu) # elements wise dot of user and item embedding with tf.variable_scope("gmf") as scope: self.gmf = tf.reduce_sum(self.emb_query * self.emb_item, 1, keep_dims=True) self.gmf = tf.add(self.gmf, self.b_global) self.gmf = tf.add(self.gmf, self.query_bias) self.gmf = tf.add(self.gmf, self.candidate_bias) self.infer = tf.nn.sigmoid(self.gmf, name="infer") # one query for all items, for predict speed self.pred = tf.matmul(self.emb_query, tf.transpose(self.emb_item)) + \ tf.reshape(self.candidate_bias, (1, -1)) + \ self.query_bias + \ self.b_global self.pred = tf.nn.sigmoid(self.pred, name='pred') # Provide an estimator spec for `ModeKeys.PREDICT` if mode == tf.estimator.ModeKeys.PREDICT: export_outputs = { 'outputs': tf.estimator.export.PredictOutput({ 'emb_query': self.emb_query, 'emb_item': self.emb_item, 'predictions': self.infer }) } return tf.estimator.EstimatorSpec(mode=mode, predictions=self.pred, export_outputs=export_outputs) with tf.variable_scope("loss") as scope: # self.alter_rating = tf.to_float(self.label >= 4)[:, tf.newaxis] self.ans = tf.to_float(self.labels)[:, tf.newaxis] self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.ans, logits=self.gmf)) tf.summary.scalar('loss', self.loss) with tf.variable_scope("metrics") as scope: self.auc = tf.metrics.auc(tf.cast(self.labels, tf.bool), tf.reshape(self.infer, [-1])) # tf.summary.scalar('auc', self.auc) self.train_op = None self.global_step = tf.train.get_or_create_global_step() if mode == tf.estimator.ModeKeys.TRAIN: with tf.variable_scope("train"): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.train_op = tf.train.AdamOptimizer().minimize( self.loss, self.global_step) # self.train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(self.loss) # self.merge = tf.summary.merge_all() return tf.estimator.EstimatorSpec(mode=mode, loss=self.loss, train_op=self.train_op, eval_metric_ops={'auc': self.auc}, evaluation_hooks=[])
def model_fn(features, labels, mode, params): """Bulid Model function f(x) for Estimator.""" #------hyperparameters---- field_size = params["field_size"] feature_size = params["feature_size"] embedding_size = params["embedding_size"] l2_reg = params["l2_reg"] learning_rate = params["learning_rate"] #optimizer = params["optimizer"] layers = map(int, params["attention_layers"].split(',')) dropout = map(float, params["dropout"].split(',')) #------bulid weights------ Global_Bias = tf.get_variable(name='bias', shape=[1], initializer=tf.constant_initializer(0.0)) Feat_Bias = tf.get_variable(name='linear', shape=[feature_size], initializer=tf.glorot_normal_initializer()) Feat_Emb = tf.get_variable(name='emb', shape=[feature_size,embedding_size], initializer=tf.glorot_normal_initializer()) #------build feaure------- feat_ids = features['feat_ids'] feat_ids = tf.reshape(feat_ids,shape=[-1,field_size]) feat_vals = features['feat_vals'] feat_vals = tf.reshape(feat_vals,shape=[-1,field_size]) #------build f(x)------ with tf.variable_scope("Linear-part"): feat_wgts = tf.nn.embedding_lookup(Feat_Bias, feat_ids) # None * F * 1 y_linear = tf.reduce_sum(tf.multiply(feat_wgts, feat_vals),1) with tf.variable_scope("Pairwise-Interaction-Layer"): embeddings = tf.nn.embedding_lookup(Feat_Emb, feat_ids) # None * F * K feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1]) embeddings = tf.multiply(embeddings, feat_vals) #vij*xi num_interactions = field_size*(field_size-1)/2 element_wise_product_list = [] for i in range(0, field_size): for j in range(i+1, field_size): element_wise_product_list.append(tf.multiply(embeddings[:,i,:], embeddings[:,j,:])) element_wise_product = tf.stack(element_wise_product_list) # (F*(F-1)) * None * K element_wise_product = tf.transpose(element_wise_product, perm=[1,0,2]) # None * (F*(F-1)) * K #interactions = tf.reduce_sum(element_wise_product, 2, name="interactions") with tf.variable_scope("Attention-part"): deep_inputs = tf.reshape(element_wise_product, shape=[-1, embedding_size]) # (None * (F*(F-1))) * K for i in range(len(layers)): deep_inputs = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=layers[i], \ weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='mlp%d' % i) aij = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, \ weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='attention_out')# (None * (F*(F-1))) * 1 #aij_reshape = tf.reshape(aij, shape=[-1, num_interactions, 1]) # None * (F*(F-1)) * 1 aij_softmax = tf.nn.softmax(tf.reshape(aij, shape=[-1, num_interactions, 1]), dim=1, name='attention_soft') if mode == tf.estimator.ModeKeys.TRAIN: aij_softmax = tf.nn.dropout(aij_softmax, keep_prob=dropout[0]) with tf.variable_scope("Attention-based-Pooling"): y_emb = tf.reduce_sum(tf.multiply(aij_softmax, element_wise_product), 1) # None * K if mode == tf.estimator.ModeKeys.TRAIN: y_emb = tf.nn.dropout(y_emb, keep_prob=dropout[1]) y_d = tf.contrib.layers.fully_connected(inputs=y_emb, num_outputs=1, activation_fn=tf.identity, \ weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='deep_out') # None * 1 y_deep = tf.reshape(y_d,shape=[-1]) with tf.variable_scope("AFM-out"): #y_bias = Global_Bias * tf.ones_like(labels, dtype=tf.float32) # None * 1 warning;这里不能用label,否则调用predict/export函数会出错,train/evaluate正常;初步判断estimator做了优化,用不到label时不传 y_bias = Global_Bias * tf.ones_like(y_deep, dtype=tf.float32) # None * 1 y = y_bias + y_linear + y_deep pred = tf.sigmoid(y) predictions={"prob": pred} export_outputs = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)} # Provide an estimator spec for `ModeKeys.PREDICT` if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, export_outputs=export_outputs) #------bulid loss------ loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)) + \ l2_reg * tf.nn.l2_loss(Feat_Bias) + l2_reg * tf.nn.l2_loss(Feat_Emb) # Provide an estimator spec for `ModeKeys.EVAL` eval_metric_ops = { "auc": tf.metrics.auc(labels, pred) } if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, eval_metric_ops=eval_metric_ops) #------bulid optimizer------ if FLAGS.optimizer == 'Adam': optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8) elif FLAGS.optimizer == 'Adagrad': optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate, initial_accumulator_value=1e-8) elif FLAGS.optimizer == 'Momentum': optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.95) elif FLAGS.optimizer == 'ftrl': optimizer = tf.train.FtrlOptimizer(learning_rate) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) # Provide an estimator spec for `ModeKeys.TRAIN` modes if mode == tf.estimator.ModeKeys.TRAIN: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op)
def model_fn(features, labels, mode, params): """Bulid Model function f(x) for Estimator.""" #------hyperparameters---- field_size = params["field_size"] feature_size = params["feature_size"] embedding_size = params["embedding_size"] l2_reg = params["l2_reg"] learning_rate = params["learning_rate"] #optimizer = params["optimizer"] layers = map(int, params["deep_layers"].split(',')) dropout = map(float, params["dropout"].split(',')) #------bulid weights------ Global_Bias = tf.get_variable(name='bias', shape=[1], initializer=tf.constant_initializer(0.0)) Feat_Bias = tf.get_variable(name='linear', shape=[feature_size], initializer=tf.glorot_normal_initializer()) Feat_Emb = tf.get_variable(name='emb', shape=[feature_size,embedding_size], initializer=tf.glorot_normal_initializer()) #------build feaure------- feat_ids = features['feat_ids'] feat_ids = tf.reshape(feat_ids,shape=[-1,field_size]) feat_vals = features['feat_vals'] feat_vals = tf.reshape(feat_vals,shape=[-1,field_size]) #------build f(x)------ with tf.variable_scope("Linear-part"): feat_wgts = tf.nn.embedding_lookup(Feat_Bias, feat_ids) # None * F * 1 y_linear = tf.reduce_sum(tf.multiply(feat_wgts, feat_vals),1) with tf.variable_scope("BiInter-part"): embeddings = tf.nn.embedding_lookup(Feat_Emb, feat_ids) # None * F * K feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1]) embeddings = tf.multiply(embeddings, feat_vals) # vij * xi sum_square_emb = tf.square(tf.reduce_sum(embeddings,1)) square_sum_emb = tf.reduce_sum(tf.square(embeddings),1) deep_inputs = 0.5*tf.subtract(sum_square_emb, square_sum_emb) # None * K with tf.variable_scope("Deep-part"): if mode == tf.estimator.ModeKeys.TRAIN: train_phase = True else: train_phase = False if mode == tf.estimator.ModeKeys.TRAIN: deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[0]) # None * K for i in range(len(layers)): deep_inputs = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=layers[i], \ weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='mlp%d' % i) if FLAGS.batch_norm: deep_inputs = batch_norm_layer(deep_inputs, train_phase=train_phase, scope_bn='bn_%d' %i) #放在RELU之后 https://github.com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm.md#bn----before-or-after-relu if mode == tf.estimator.ModeKeys.TRAIN: deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[i]) #Apply Dropout after all BN layers and set dropout=0.8(drop_ratio=0.2) #deep_inputs = tf.layers.dropout(inputs=deep_inputs, rate=dropout[i], training=mode == tf.estimator.ModeKeys.TRAIN) y_deep = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, \ weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='deep_out') y_d = tf.reshape(y_deep,shape=[-1]) with tf.variable_scope("NFM-out"): #y_bias = Global_Bias * tf.ones_like(labels, dtype=tf.float32) # None * 1 warning;这里不能用label,否则调用predict/export函数会出错,train/evaluate正常;初步判断estimator做了优化,用不到label时不传 y_bias = Global_Bias * tf.ones_like(y_d, dtype=tf.float32) # None * 1 y = y_bias + y_linear + y_d pred = tf.sigmoid(y) predictions={"prob": pred} export_outputs = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)} # Provide an estimator spec for `ModeKeys.PREDICT` if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, export_outputs=export_outputs) #------bulid loss------ loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)) + \ l2_reg * tf.nn.l2_loss(Feat_Bias) + l2_reg * tf.nn.l2_loss(Feat_Emb) # Provide an estimator spec for `ModeKeys.EVAL` eval_metric_ops = { "auc": tf.metrics.auc(labels, pred) } if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, eval_metric_ops=eval_metric_ops) #------bulid optimizer------ if FLAGS.optimizer == 'Adam': optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8) elif FLAGS.optimizer == 'Adagrad': optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate, initial_accumulator_value=1e-8) elif FLAGS.optimizer == 'Momentum': optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.95) elif FLAGS.optimizer == 'ftrl': optimizer = tf.train.FtrlOptimizer(learning_rate) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) # Provide an estimator spec for `ModeKeys.TRAIN` modes if mode == tf.estimator.ModeKeys.TRAIN: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op)