def test_op(self, batch_parsed_features): embedding_list = [] user_long_embedding_size = 0 combiner = 'mean' for field_name in self.sparse_field_name_list: with tf.variable_scope(field_name + "_embedding_layer", reuse=tf.AUTO_REUSE): #embedding 层 field_sparse_ids = batch_parsed_features[field_name] field_sparse_values = batch_parsed_features[field_name + "_values"] if combiner == 'mean': embedding = tf.nn.embedding_lookup_sparse(self.tag_embedding_weight, field_sparse_ids, field_sparse_values, combiner="mean") elif combiner == 'avg': embedding = tf.nn.embedding_lookup_sparse(self.tag_embedding_weight, field_sparse_ids, field_sparse_values, combiner="sum") sparse_features = tf.sparse_merge(field_sparse_ids, field_sparse_values, vocab_size=self.label_size) sparse_x_feature_cnt = tf.sparse_tensor_dense_matmul(sparse_features, self.tag_ones) embedding = tf.div(embedding, sparse_x_feature_cnt) embedding_list.append(embedding) user_long_embedding_size += self.embedding_dim for i, field_name in enumerate(self.dense_field_name_list): with tf.variable_scope(field_name + "_dense_layer", reuse=tf.AUTO_REUSE): field_dense_feature_values = tf.decode_raw(batch_parsed_features[field_name], tf.float32) embedding_list.append(field_dense_feature_values) user_long_embedding_size += self.dense_field_size[i] user_long_embedding = tf.concat(embedding_list, 1) print(str(user_long_embedding_size)) return user_long_embedding
def decode_example(protos, vocab_size): #TODO features = { 'key': tf.FixedLenFeature([1], tf.int64), ## one user (or one item) 'indices': tf.VarLenFeature( dtype=tf.int64 ), ## multiple items (or users) that the user (item) has interacted with 'values': tf.VarLenFeature( dtype=tf.float32 ) ## ratings of these items (or users) in the above interactions } ## read tf-record file examples in the given format above: parsed_features = tf.parse_single_example(protos, features) ## convert into sparse tensor as needed for WALS: values = tf.sparse_merge(parsed_features['indices'], parsed_features['values'], vocab_size=vocab_size) # Save key to remap after batching # This is a temporary workaround to assign correct row numbers in each batch. # You can ignore details of this part and remap_keys(). key = parsed_features['key'] decoded_sparse_tensor = tf.SparseTensor( indices=tf.concat([values.indices, [key]], axis=0), values=tf.concat([values.values, [0.0]], axis=0), dense_shape=values.dense_shape) return decoded_sparse_tensor
def make_example_dict(example_protos, example_weights): def parse_examples(example_protos): features = { 'target': tf.FixedLenFeature(shape=[1], dtype=tf.float32, default_value=0), 'age_indices': tf.VarLenFeature(dtype=tf.int64), 'age_values': tf.VarLenFeature(dtype=tf.float32), 'gender_indices': tf.VarLenFeature(dtype=tf.int64), 'gender_values': tf.VarLenFeature(dtype=tf.float32) } return tf.parse_example( [e.SerializeToString() for e in example_protos], features) sparse_merge = lambda ids, values: tf.sparse_merge(ids, values, ids.shape[1]) parsed = parse_examples(example_protos) sparse_features = [ sparse_merge(parsed['age_indices'], parsed['age_values']), sparse_merge(parsed['gender_indices'], parsed['gender_values']) ] return dict(sparse_features=sparse_features, dense_features=[], example_weights=example_weights, example_labels=tf.reshape(parsed['target'], [-1]), example_ids=['%d' % i for i in xrange(0, len(example_protos))])
def train_op(self, batch_parsed_features): batch_labels = tf.sparse_tensor_to_dense( tf.sparse_merge(batch_parsed_features["label"], batch_parsed_features["label_values"], self.label_size)) user_nn_layer_output = self.get_user_embedding_list(batch_parsed_features,need_dropout=self.need_dropout) logits = tf.matmul(user_nn_layer_output, tf.transpose(self.tag_embedding_weight)) logits = tf.nn.bias_add(logits, self.tag_embedding_biases) # train loss cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=batch_labels, logits=logits) # cross_entropy = tf.losses.sigmoid_cross_entropy(logits=logits, multi_class_labels=batch_labels) loss = tf.reduce_mean(cross_entropy) global_step = tf.Variable(0, name="global_step", trainable=False) train_step = tf.train.AdamOptimizer(self.learning_rate).minimize(loss, global_step=global_step) tf.summary.scalar('loss', loss) # accuracy predictions = tf.nn.sigmoid(logits, name='prediction') #tf.equal(A, B)是对比这两个矩阵或者向量的相等的元素,如果是相等的那就返回True,反正返回False,返回的值的矩阵维度和A是一样的 #tf.round 将张量的值四舍五入为最接近的整数,元素 correct_prediction = tf.equal(tf.round(predictions), batch_labels) #tf.cast(x, dtype, name=None) 将输入x转化为dtype型 accuracy = tf.cast(correct_prediction, tf.float32) #tf.reduce_mean求均值 mean_accuracy = tf.reduce_mean(accuracy) tf.summary.scalar('mean_accuracy', mean_accuracy) # mean_average_precision = tf.metrics.average_precision_at_k(tf.cast(batch_labels,tf.int64),predictions,100) # tf.summary.scalar('mean_average_precision', mean_average_precision[0]) return train_step, global_step,loss
def decode_example(protos, vocab_size): features = {'key': tf.FixedLenFeature([1], tf.int64), 'indices': tf.VarLenFeature(dtype=tf.int64), 'values': tf.VarLenFeature(dtype=tf.float32)} parsed_features = tf.parse_single_example(protos, features) keys = parsed_features['key'] values = tf.sparse_merge(parsed_features['indices'], parsed_features['values'], vocab_size=vocab_size) return values
def train_op(self, batch_parsed_features): global_steps = tf.Variable(0, trainable=False) batch_labels = tf.reshape(tf.to_float(batch_parsed_features["label"]), [-1]) sparse_ids = batch_parsed_features["indices"] sparse_values = batch_parsed_features["values"] sparse_features = tf.sparse_merge(sparse_ids, sparse_values, vocab_size=self.feature_size) sparse_values_square = tf.SparseTensor(sparse_values.indices, tf.pow(sparse_values.values, 2), sparse_values.dense_shape) sparse_features_square = tf.sparse_merge(sparse_ids, sparse_values_square, vocab_size=self.feature_size) # 获取LR输出 lr_layer = layers.get_lr_layer(sparse_features, self.lr_weights_shape, self.lr_biases_shape) # 获取FM向量输出 fm_layer = layers.get_fm_layer(sparse_features, sparse_features_square, self.fm_weights_shape, self.embedding_dim) # 获取Field Embedding输出 field_embeddings, input_size = self.get_field_embeddings( sparse_features) nn_layer = layers.get_nn_layer(field_embeddings, input_size, self.nn_layer_shape) field_deepfm_output = tf.reshape(lr_layer + fm_layer + nn_layer, [-1]) cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits( labels=batch_labels, logits=field_deepfm_output) #train loss loss = tf.reduce_mean(cross_entropy) train_step = tf.train.AdamOptimizer(self.learning_rate).minimize( loss, global_step=global_steps) #train auc field_deepfm_output_sigmoid = tf.sigmoid(field_deepfm_output) auc = tf.metrics.auc(batch_labels, field_deepfm_output_sigmoid) tf.summary.scalar('auc1', auc[0]) tf.summary.scalar('auc2', auc[1]) tf.summary.scalar('loss', loss) return train_step, loss, global_steps, auc
def map_op(self, batch_parsed_features): batch_labels = tf.sparse_tensor_to_dense( tf.sparse_merge(batch_parsed_features["label"], batch_parsed_features["label_values"], self.label_size)) user_nn_layer_output = self.get_user_embedding_list(batch_parsed_features,need_dropout=False) logits = tf.matmul(user_nn_layer_output, tf.transpose(self.tag_embedding_weight)) logits = tf.nn.bias_add(logits, self.tag_embedding_biases) predictions = tf.nn.sigmoid(logits, name='prediction')#将输出sigmoid后输出 return predictions, batch_labels
def decode_example(protos, vocab_size): features = {'key': tf.FixedLenFeature([1], tf.int64), 'indices': tf.VarLenFeature(dtype=tf.int64), 'values': tf.VarLenFeature(dtype=tf.float32)} parsed_features = tf.parse_single_example(protos, features) values = tf.sparse_merge(parsed_features['indices'], parsed_features['values'], vocab_size=vocab_size) # Save key to remap after batching key = parsed_features['key'] decoded_sparse_tensor = tf.SparseTensor(indices=tf.concat([values.indices, [key]], axis = 0), values = tf.concat([values.values, [0.0]], axis = 0), dense_shape = values.dense_shape) return decoded_sparse_tensor
def _sparse_joints_to_dense_one_dim(dense_shape, joint_indices, joints, num_joints): """Converts a sparse vector of joints in a single dimension to dense joints, and returns those dense joints. """ sparse_joints = tf.sparse_merge(sp_ids=joint_indices, sp_values=joints, vocab_size=num_joints) dense_joints = tf.sparse_tensor_to_dense(sp_input=sparse_joints, default_value=0) return tf.reshape(tensor=dense_joints, shape=dense_shape), sparse_joints
def decode_example(protos, vocab_size): features = { "key": tf.FixedLenFeature(shape = [1], dtype = tf.int64), "indices": tf.VarLenFeature(dtype = tf.int64), "values": tf.VarLenFeature(dtype = tf.float32)} parsed_features = tf.parse_single_example(serialized = protos, features = features) values = tf.sparse_merge(sp_ids = parsed_features["indices"], sp_values = parsed_features["values"], vocab_size = vocab_size) key = parsed_features["key"] decoded_sparse_tensor = tf.SparseTensor(indices = tf.concat(values = [values.indices, [key]], axis = 0), values = tf.concat(values = [values.values, [0.0]], axis = 0), dense_shape = values.dense_shape) return decoded_sparse_tensor
def map_op(self, batch_parsed_features): #预测函数 with tf.name_scope("map_op"): batch_labels = tf.sparse_tensor_to_dense( tf.sparse_merge(batch_parsed_features["label"], batch_parsed_features["label_values"], self.label_size)) user_nn_layer_output = self.get_user_embedding_list( batch_parsed_features, is_train=False) #tf.matmul,专门矩阵或者tensor乘法 logits = tf.matmul(user_nn_layer_output, tf.transpose(self.tag_embedding_weight)) logits = tf.nn.bias_add(logits, self.tag_embedding_biases) predictions = tf.nn.sigmoid(logits, name='prediction') return predictions, batch_labels
def decode_example(protos, vocab_size): features = { "key": tf.FixedLenFeature(shape = [1], dtype = tf.int64), "indices": tf.VarLenFeature(dtype = tf.int64), "values": tf.VarLenFeature(dtype = tf.float32)} parsed_features = tf.parse_single_example(serialized = protos, features = features) values = tf.sparse_merge(sp_ids = parsed_features["indices"], sp_values = parsed_features["values"], vocab_size = vocab_size) # Save key to remap after batching # This is a temporary workaround to assign correct row numbers in each batch. # You can ignore details of this part and remap_keys(). key = parsed_features["key"] decoded_sparse_tensor = tf.SparseTensor(indices = tf.concat(values = [values.indices, [key]], axis = 0), values = tf.concat(values = [values.values, [0.0]], axis = 0), dense_shape = values.dense_shape) return decoded_sparse_tensor
def parse_tfrecords_new(filename, vocab_size): if mode == tf.estimator.ModeKeys.TRAIN: batch_size = args['batch_size'] else: batch_size = 1 parsed_features = tf.contrib.learn.io.read_batch_features( os.path.join(args['input_path'], filename), batch_size, { 'key': tf.FixedLenFeature([1], tf.int64), 'indices': tf.VarLenFeature(dtype=tf.int64), 'values': tf.VarLenFeature(dtype=tf.float32) }, tf.TFRecordReader) keys = parsed_features['key'] values = tf.sparse_merge(parsed_features['indices'], parsed_features['values'], vocab_size=vocab_size) return values
def parse_tfrecords_new(filename, vocab_size): if mode == tf.estimator.ModeKeys.TRAIN: batch_size = args['batch_size'] else: batch_size = 1 parsed_features = tf.contrib.learn.io.read_batch_features( os.path.join(args['input_path'], filename), batch_size, {'key': tf.FixedLenFeature([1], tf.int64), 'indices': tf.VarLenFeature(dtype=tf.int64), 'values': tf.VarLenFeature(dtype=tf.float32)}, tf.TFRecordReader ) keys = parsed_features['key'] values = tf.sparse_merge(parsed_features['indices'], parsed_features['values'], vocab_size=vocab_size) return values
def _raw_features_to_dense_tensor(raw_features): """Convert the raw features expressing a sparse vector to a dense tensor. Args: raw_features: Parsed features in sparse matrix format. Returns: A dense tensor populated with the raw features. """ # Load the vocabulary here as each batch of examples is parsed to ensure that # the examples and the mapping table are located in the same TensorFlow graph. measurement_table = tf.contrib.lookup.index_table_from_file( vocabulary_file=FLAGS.vocabulary_file) tf.logging.info("Loaded vocabulary file %s with %s terms.", FLAGS.vocabulary_file, str(measurement_table.size())) indices = measurement_table.lookup(raw_features[MEASUREMENTS_FEATURE]) merged = tf.sparse_merge(indices, raw_features[VALUES_FEATURE], vocab_size=measurement_table.size()) return tf.sparse_tensor_to_dense(merged)
def get_user_embedding_list(self, batch_parsed_features, combiner='mean',need_dropout=False):# embedding_list = [] user_long_embedding_size = 0 for field_name in self.sparse_field_name_list: with tf.variable_scope(field_name + "_embedding_layer", reuse=tf.AUTO_REUSE):#embedding层,自动实现共享变量 field_sparse_ids = batch_parsed_features[field_name] field_sparse_values = batch_parsed_features[field_name + "_values"] if combiner == 'mean': embedding = tf.nn.embedding_lookup_sparse(self.tag_embedding_weight, field_sparse_ids, field_sparse_values, combiner="mean") elif combiner == 'avg': embedding = tf.nn.embedding_lookup_sparse(self.tag_embedding_weight, field_sparse_ids, field_sparse_values, combiner="sum") sparse_features = tf.sparse_merge(field_sparse_ids, field_sparse_values, vocab_size=self.label_size) sparse_x_feature_cnt = tf.sparse_tensor_dense_matmul(sparse_features, self.tag_ones)#(稠密矩阵)self.tag_ones*sparse_features(稀疏矩阵) embedding = tf.div(embedding, sparse_x_feature_cnt) # embedding/(self.tag_ones*sparse_features) embedding_list.append(embedding) user_long_embedding_size += self.embedding_dim for i, field_name in enumerate(self.dense_field_name_list): with tf.variable_scope(field_name + "_dense_layer", reuse=tf.AUTO_REUSE):#dense层,进行压缩 field_dense_feature_values = tf.decode_raw(batch_parsed_features[field_name], tf.float32) embedding_list.append(field_dense_feature_values) user_long_embedding_size += self.dense_field_size[i] user_long_embedding = tf.concat(embedding_list, 1) user_long_embedding = tf.reshape(user_long_embedding, shape=[-1, user_long_embedding_size]) print("user_long_embedding_size=" + str(user_long_embedding_size)) with tf.variable_scope("user_nn_layer"):#nn层 input_layer_output = layers.get_nn_layer_v2(user_long_embedding, user_long_embedding_size, self.nn_layer_shape, activation=self.activation, need_dropout=need_dropout) return input_layer_output
def next_batch(self): batch = tf.train.batch([self.serialized_example], self.batch_size) features = tf.parse_example(batch, features={ 'fea_indices_row': tf.VarLenFeature(tf.int64), 'fea_indices_col': tf.VarLenFeature(tf.int64), 'fea_values': tf.VarLenFeature(tf.float32), 'seq_len': tf.FixedLenFeature([1], tf.int64), 'label': tf.FixedLenFeature([1], tf.int64), 'forloeb': tf.FixedLenFeature([1], tf.int64), 'ctx_indices': tf.VarLenFeature(tf.int64), 'ctx_values': tf.VarLenFeature(tf.float32), 'text_indices_row': tf.VarLenFeature(tf.int64), 'text_indices_col': tf.VarLenFeature(tf.int64), 'text_indices_vals': tf.VarLenFeature(tf.int64), 'text_len_idx': tf.VarLenFeature(tf.int64), 'text_len': tf.VarLenFeature(tf.int64), 'doc_ids': tf.VarLenFeature(tf.int64) }) seq_len = tf.sparse_reduce_max(features['fea_indices_row'], axis=1) + 10 max_seq_len = tf.reduce_max( tf.stack([ tf.reduce_max(seq_len) - 1, tf.reduce_max(features['text_indices_row'].values) ])) + 1 fea_dim = [max_seq_len, self.feature_size] # Read values and construct sparse tensor sparse_indices_row = features['fea_indices_row'] sparse_indices_col = features['fea_indices_col'] sparse_vals = features['fea_values'] feature_tensor = sparse_merge.sparse_merge( [sparse_indices_row, sparse_indices_col], sparse_vals, fea_dim) text_indices_row = features['text_indices_row'] text_indices_col = features['text_indices_col'] text_indices_vals = features['text_indices_vals'] max_text_len = tf.cond( tf.equal(text_indices_col.dense_shape[1], tf.constant(0, tf.int64)), lambda: tf.constant(0, tf.int64), lambda: tf.add(tf.reduce_max(text_indices_col.values), 1)) text_tensor = sparse_merge.sparse_merge( [text_indices_row, text_indices_col], text_indices_vals, [max_seq_len, max_text_len]) text_len_tensor = sparse_merge.sparse_merge([features['text_len_idx']], features['text_len'], [max_seq_len]) doc_ids_tensor = sparse_merge.sparse_merge([features['text_len_idx']], features['doc_ids'], [max_seq_len]) ctx_tensor = tf.sparse_merge(features['ctx_indices'], features['ctx_values'], self.context_size) return { 'features': feature_tensor, 'context': ctx_tensor, 'seq_len': seq_len, 'label': features['label'], 'forloeb': features['forloeb'], 'text': text_tensor, 'text_len': text_len_tensor, 'doc_ids': doc_ids_tensor }
def get_user_embedding_list(self, batch_parsed_features, combiner='mean', is_train=True): embedding_list = [] user_long_embedding_size = 0 for field_name in self.sparse_field_name_list: with tf.variable_scope( field_name + "_embedding_layer", reuse=tf.AUTO_REUSE): #;自动实现共享变量 ;embedding层 field_sparse_ids = batch_parsed_features[field_name] field_sparse_values = batch_parsed_features[field_name + "_values"] #####组合器 if combiner == 'mean': embedding = tf.nn.embedding_lookup_sparse( self.tag_embedding_weight, field_sparse_ids, field_sparse_values, combiner="mean") elif combiner == 'avg': embedding = tf.nn.embedding_lookup_sparse( self.tag_embedding_weight, field_sparse_ids, field_sparse_values, combiner="sum") sparse_features = tf.sparse_merge( field_sparse_ids, field_sparse_values, vocab_size=self.label_size) # tf.sparse_tensor_dense_matmul sparse_x_feature_cnt = tf.sparse_tensor_dense_matmul( sparse_features, self.tag_ones ) #(稠密矩阵)self.tag_ones*sparse_features(稀疏矩阵) #tf.div 相除 embedding = tf.div( embedding, sparse_x_feature_cnt ) #embedding/(self.tag_ones*sparse_features) embedding_list.append(embedding) user_long_embedding_size += self.embedding_dim for i, field_name in enumerate(self.dense_field_name_list): with tf.variable_scope(field_name + "_dense_layer", reuse=tf.AUTO_REUSE): #dense层 field_dense_feature_values = tf.decode_raw( batch_parsed_features[field_name], tf.float32) #解码函数tf.decode_raw embedding_list.append(field_dense_feature_values) user_long_embedding_size += self.dense_field_size[i] user_long_embedding = tf.concat(embedding_list, 1) print("user_long_embedding_size=" + str(user_long_embedding_size)) with tf.variable_scope("user_nn_layer"): #nn层 ''' layers.get_nn_layer_v2''' input_layer_output = layers.get_nn_layer_v2( user_long_embedding, user_long_embedding_size, self.nn_layer_shape, activation=self.activation, l2_reg=self.l2_reg, need_dropout=(is_train and self.need_dropout)) '''搞清楚这个函数的意义''' return input_layer_output
tf.trace() tf.trainable_variables() tf.transpose() tf.truncated_normal() tf.truediv() tf.sparse_transpose() tf.sparse_tensor_dense_matmul() tf.sparse_accumulator_apply_gradient() tf.sparse_accumulator_take_gradient() tf.sparse_add() tf.sparse_concat() tf.sparse_conditional_accumulator() tf.sparse_mask() tf.sparse_matmul() tf.sparse_maximum() tf.sparse_merge() tf.sparse_minimum() tf.sparse_reduce_max() tf.sparse_reduce_max_sparse() tf.reduce_all() tf.reduce_any() tf.reduce_join() tf.reduce_logsumexp() tf.reduce_max() tf.reduce_mean() tf.reduce_min() tf.reduce_prod() tf.reduce_sum() tf.reduced_shape()