def loss_fn(emb): embedding = embedding_ops.safe_embedding_lookup_sparse(emb, sp_ids, None, combiner='sum') pred0 = math_ops.matmul(embedding, x) return pred0 * pred0
def test_safe_embedding_lookup_sparse_3d_return_zero_vector(self): with self.cached_session(): embedding_weights = self._random_weights() sparse_ids, sparse_weights = self._ids_and_weights_3d() embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights).eval()) self.assertAllClose(embedding_lookup_result, [[ (1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / 3.0, [0] * 4, [0] * 4 ], [embedding_weights[0][2], [0] * 4, [0] * 4]])
def test_safe_embedding_lookup_sparse_partitioned(self): with self.cached_session(): embedding_weights = self._random_weights(num_shards=3) sparse_ids, _ = self._ids_and_weights_2d() embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( embedding_weights, sparse_ids, None).eval()) embedding_weights = list(itertools.chain(*embedding_weights)) self.assertAllClose(embedding_lookup_result, [(embedding_weights[0] + embedding_weights[1]) / 2.0, [0] * 4, [0] * 4, embedding_weights[2], (embedding_weights[0] + embedding_weights[1]) / 2.0])
def test_safe_embedding_lookup_sparse_no_weights(self): with self.cached_session(): embedding_weights = self._random_weights() sparse_ids, _ = self._ids_and_weights_2d() embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( embedding_weights, sparse_ids, None).eval()) self.assertAllClose( embedding_lookup_result, [(embedding_weights[0][0] + embedding_weights[0][1]) / 2.0, [0] * 4, [0] * 4, embedding_weights[0][2], ( embedding_weights[0][0] + embedding_weights[0][1]) / 2.0])
def test_safe_embedding_lookup_sparse_return_special_vector(self): with self.cached_session(): embedding_weights = self._random_weights() sparse_ids, sparse_weights = self._ids_and_weights_2d() embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights, default_id=3).eval()) self.assertAllClose( embedding_lookup_result, [(1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / 3.0, embedding_weights[0][3], embedding_weights[0][3], embedding_weights[0][2], embedding_weights[0][3]])
def test_safe_embedding_lookup_sparse_shape_checking(self): with self.session(use_gpu=test_util.is_gpu_available(), config=default_config): embed_dim = 4 embedding_weights_nn = variable_scope.get_variable( "n", shape=[100, embed_dim], use_resource=False) embedding_weights_de = _random_weights(embed_dim=4) sparse_ids, _ = ids_and_weights_3d(embed_dim=embed_dim) embedding_lookup_base = embedding_ops.safe_embedding_lookup_sparse( embedding_weights_nn, sparse_ids, None) embedding_lookup_test = de.safe_embedding_lookup_sparse( embedding_weights_de, sparse_ids, None) self.assertAllEqual(embedding_lookup_base.shape, embedding_lookup_test.shape) self.assertAllEqual(embedding_lookup_base.get_shape(), embedding_lookup_test.get_shape())
def call(self, inputs): # When saving a model with the layer, `tf.saved_model.save` will # feed the inputs with a Tensor not a SparseTensor, so we should # convert Tensor to `SparseTensor`. if not isinstance(inputs, tf.SparseTensor): idx = tf.where(tf.not_equal(inputs, 0)) inputs = tf.SparseTensor(idx, tf.gather_nd(inputs, idx), (-1, self.input_dim)) dtype = K.dtype(inputs) if dtype != "int32" and dtype != "int64": inputs = math_ops.cast(inputs, "int32") out = embedding_ops.safe_embedding_lookup_sparse( embedding_weights=self.embeddings, sparse_ids=inputs, sparse_weights=None, combiner=self.combiner, ) return out
def output_logits_from_linear(features, embedding_table, params): field2vocab_mapping = params['field_vocab_mapping'] combiner = params.get('multi_embed_combiner', 'sum') fields_outputs = [] # 当前field下有一系列的<tag:value>对,每个tag对应一个bias(待优化), # 将所有tag对应的bias,按照其value进行加权平均,得到这个field对应的bias for fieldname, vocabname in field2vocab_mapping.items(): sp_ids = features[fieldname + "_ids"] sp_values = features[fieldname + "_values"] linear_weights = embedding_table.get_linear_weights( vocab_name=vocabname) # weights: [vocab_size,1] # sp_ids: [batch_size, max_tags_per_example] # sp_weights: [batch_size, max_tags_per_example] # output: [batch_size, 1] output = embedding_ops.safe_embedding_lookup_sparse( linear_weights, sp_ids, sp_values, combiner=combiner, name='{}_linear_output'.format(fieldname)) fields_outputs.append(output) # 因为不同field可以共享同一个vocab的linear weight,所以将各个field的output相加,会损失大量的信息 # 因此,所有field对应的output拼接起来,反正每个field的output都是[batch_size,1],拼接起来,并不占多少空间 # whole_linear_output: [batch_size, total_fields] whole_linear_output = tf.concat(fields_outputs, axis=1) tf.logging.info("linear output, shape={}".format( whole_linear_output.shape)) # 再映射到final logits(二分类,也是[batch_size,1]) # 这时,就不要用任何activation了,特别是ReLU return tf.layers.dense(whole_linear_output, units=1, use_bias=True, activation=None)
def build_input(features, params): cat_columns = params['cat_columns'] val_columns = params['val_columns'] column_to_field = params['column_to_field'] #dnn_columns = params['dnn_columns'] dimension_config = params['dimension_config'] reg = params['reg'] embed_dim = params['embed_dim'] embedding_table = EmbeddingTable() embedding_dict = OrderedDict() with tf.variable_scope("fm", reuse=tf.AUTO_REUSE, values=[features]) as scope: with tf.device('/cpu:0'): for name, col in cat_columns.items(): field = column_to_field.get(name, name) cur_dimension = dimension_config[field] if field in dimension_config else embed_dim embedding_table.add_linear_weights(vocab_name=name, vocab_size=col._num_buckets) embedding_table.add_embed_weights(vocab_name=field, vocab_size=col._num_buckets, embed_dim=cur_dimension, reg=reg) for name, col in val_columns.items(): field = column_to_field.get(name, name) cur_dimension = dimension_config[field] if field in dimension_config else embed_dim embedding_table.add_linear_weights(vocab_name=name, vocab_size=1) embedding_table.add_embed_weights(vocab_name=field, vocab_size=1, embed_dim=cur_dimension, reg=reg) builder = _LazyBuilder(features) # linear part linear_outputs = [] for name, col in cat_columns.items(): # get sparse tensor of input feature from feature column sp_tensor = col._get_sparse_tensors(builder) sp_ids = sp_tensor.id_tensor linear_weights = embedding_table.get_linear_weights(name) # linear_weights: (vocab_size, 1) # sp_ids: (batch_size, max_tokens_per_example) # sp_values: (batch_size, max_tokens_per_example) linear_output = embedding_ops.safe_embedding_lookup_sparse( linear_weights, sp_ids, None, combiner='sum', name='{}_linear_output'.format(name)) linear_outputs.append(linear_output) for name, col in val_columns.items(): dense_tensor = col._get_dense_tensor(builder) linear_weights = embedding_table.get_linear_weights(name) linear_output = tf.multiply(dense_tensor, linear_weights) linear_outputs.append(linear_output) # linear_outputs: (batch_szie, nonzero_feature_num) linear_outputs = tf.concat(linear_outputs, axis=1) # poly part for name, col, in cat_columns.items(): # get sparse tensor of input feature from feature column field = column_to_field.get(name, name) sp_tensor = col._get_sparse_tensors(builder) sp_ids = sp_tensor.id_tensor embed_weights = embedding_table.get_embed_weights(field) # embeddings: (batch_size, embed_dim) # x_i * v_i embeddings = embedding_ops.safe_embedding_lookup_sparse( embed_weights, sp_ids, None, combiner='sum', name='{}_{}_embedding'.format(field, name)) embedding_dict[field] = embeddings for name, col in val_columns.items(): field = column_to_field.get(name, name) dense_tensor = col._get_dense_tensor(builder) embed_weights = embedding_table.get_embed_weights(field) embeddings = tf.multiply(dense_tensor, embed_weights) embedding_dict[field] = embeddings with tf.variable_scope("dnn_embed"): x = tf.concat(list(embedding_dict.values()), axis=1) N = len(embedding_dict) T = sum([embedding.get_shape().as_list()[1] for embedding in embedding_dict.values()]) print("wkfm N:", N, " T:", T) indices = [] for i, embeddings in enumerate(embedding_dict.values()): dim = embeddings.get_shape().as_list()[1] indices.extend([i] * dim) outputs = [] for field, embeddings in embedding_dict.items(): di = dimension_config[field] if field in dimension_config else embed_dim U = tf.get_variable('{}_wkfm'.format(field), [T, di], initializer=tf.glorot_normal_initializer(), trainable=True) wkfm_weights = tf.get_variable('{}_wkfm_weights'.format(field), [N], initializer=tf.ones_initializer, trainable=True) weights = tf.gather(wkfm_weights, indices) y = tf.matmul(weights * x, U) outputs.append(y) y = tf.concat(outputs, axis=1) y = x * y new_inputs = tf.concat([linear_outputs, y], 1) return new_inputs
def build_input(features, params): cat_columns = params['cat_columns'] val_columns = params['val_columns'] column_to_field = params['column_to_field'] dnn_columns = params['dnn_columns'] reg = params['reg'] embed_dim = params['embed_dim'] dnn_part = tf.feature_column.input_layer(features, dnn_columns) embedding_table = EmbeddingTable() with tf.variable_scope("fm", reuse=tf.AUTO_REUSE, values=[features]) as scope: with tf.device('/cpu:0'): for name, col in cat_columns.items(): field = column_to_field.get(name, name) embedding_table.add_linear_weights(vocab_name=name, vocab_size=col._num_buckets) embedding_table.add_embed_weights(vocab_name=field, vocab_size=col._num_buckets, embed_dim=embed_dim, reg=reg) for name, col in val_columns.items(): field = column_to_field.get(name, name) embedding_table.add_linear_weights(vocab_name=name, vocab_size=1) embedding_table.add_embed_weights(vocab_name=field, vocab_size=1, embed_dim=embed_dim, reg=reg) builder = _LazyBuilder(features) # linear part linear_outputs = [] for name, col in cat_columns.items(): # get sparse tensor of input feature from feature column sp_tensor = col._get_sparse_tensors(builder) sp_ids = sp_tensor.id_tensor linear_weights = embedding_table.get_linear_weights(name) # linear_weights: (vocab_size, 1) # sp_ids: (batch_size, max_tokens_per_example) # sp_values: (batch_size, max_tokens_per_example) linear_output = embedding_ops.safe_embedding_lookup_sparse( linear_weights, sp_ids, None, combiner='sum', name='{}_linear_output'.format(name)) linear_outputs.append(linear_output) for name, col in val_columns.items(): dense_tensor = col._get_dense_tensor(builder) linear_weights = embedding_table.get_linear_weights(name) linear_output = tf.multiply(dense_tensor, linear_weights) linear_outputs.append(linear_output) # linear_outputs: (batch_szie, nonzero_feature_num) linear_outputs = tf.concat(linear_outputs, axis=1) # poly part sum_then_square = [] square_then_sum = [] for name, col, in cat_columns.items(): # get sparse tensor of input feature from feature column field = column_to_field.get(name, name) sp_tensor = col._get_sparse_tensors(builder) sp_ids = sp_tensor.id_tensor embed_weights = embedding_table.get_embed_weights(field) # embeddings: (batch_size, embed_dim) # x_i * v_i embeddings = embedding_ops.safe_embedding_lookup_sparse( embed_weights, sp_ids, None, combiner='sum', name='{}_{}_embedding'.format(field, name)) sum_then_square.append(embeddings) square_then_sum.append(tf.square(embeddings)) for name, col in val_columns.items(): field = column_to_field.get(name, name) dense_tensor = col._get_dense_tensor(builder) embed_weights = embedding_table.get_embed_weights(field) embeddings = tf.multiply(dense_tensor, embed_weights) sum_then_square.append(embeddings) square_then_sum.append(tf.square(embeddings)) # sum_then_square: (batch_size, embedding) sum_then_square = tf.square(tf.add_n(sum_then_square)) # square_then_sum: (batch_size, embedding) square_then_sum = tf.add_n(square_then_sum) poly_outputs = 0.5 * tf.subtract(sum_then_square, square_then_sum) new_inputs = tf.concat([linear_outputs, poly_outputs, dnn_part], 1) return new_inputs,embedding_table
def build_dfm(features, labels, mode, params): """model fn""" # build embedding tables dnn_data = tf.feature_column.input_layer(features, params['dnn_columns']) dimension = params['embedding_size'] cat_columns = params['cat_columns'] val_columns = params['val_columns'] column_to_field = params['column_to_field'] reg = params['reg'] hidden_units = params['hidden_units'] weight = tf.feature_column.input_layer(features, params['weight_columns']) embedding_table = EmbeddingTable() with tf.variable_scope("dfm", reuse=tf.AUTO_REUSE, values=[features]) as scope: for name, col in cat_columns.items(): field = column_to_field.get(name, name) embedding_table.add_linear_weights(vocab_name=name, vocab_size=col._num_buckets) embedding_table.add_embed_weights(vocab_name=field, vocab_size=col._num_buckets, embed_dim=dimension, reg=reg) for name, col in val_columns.items(): field = column_to_field.get(name, name) embedding_table.add_linear_weights(vocab_name=name, vocab_size=1) embedding_table.add_embed_weights(vocab_name=field, vocab_size=1, embed_dim=dimension, reg=reg) # bias bias = tf.get_variable(name='bias', shape=(1, ), initializer=tf.constant_initializer(0.0)) builder = _LazyBuilder(features) # linear part linear_outputs = [] for name, col in cat_columns.items(): # get sparse tensor of input feature from feature column sp_tensor = col._get_sparse_tensors(builder) sp_ids = sp_tensor.id_tensor linear_weights = embedding_table.get_linear_weights(name) # linear_weights: (vocab_size, 1) # sp_ids: (batch_size, max_tokens_per_example) # sp_values: (batch_size, max_tokens_per_example) # output: (batch_size, 1) output = embedding_ops.safe_embedding_lookup_sparse( linear_weights, sp_ids, None, combiner='sum', name='{}_linear_output'.format(name)) linear_outputs.append(output) for name, col in val_columns.items(): dense_tensor = col._get_dense_tensor(builder) linear_weights = embedding_table.get_linear_weights(name) output = tf.multiply(dense_tensor, linear_weights) linear_outputs.append(output) # linear_outputs: (batch_szie, nonzero_feature_num) linear_outputs = tf.concat(linear_outputs, axis=1) ''' # linear_logits: (batch_size, 1) linear_logits = tf.reduce_sum(linear_outputs, axis=1, keepdims=True, name='linear_logits') ''' # poly part sum_then_square = [] square_then_sum = [] for name, col, in cat_columns.items(): # get sparse tensor of input feature from feature column field = column_to_field.get(name, name) sp_tensor = col._get_sparse_tensors(builder) sp_ids = sp_tensor.id_tensor embed_weights = embedding_table.get_embed_weights(field) # embeddings: (batch_size, embed_dim) # x_i * v_i embeddings = embedding_ops.safe_embedding_lookup_sparse( embed_weights, sp_ids, None, combiner='sum', name='{}_{}_embedding'.format(field, name)) sum_then_square.append(embeddings) square_then_sum.append(tf.square(embeddings)) for name, col in val_columns.items(): field = column_to_field.get(name, name) dense_tensor = col._get_dense_tensor(builder) embed_weights = embedding_table.get_embed_weights(field) embeddings = tf.multiply(dense_tensor, embed_weights) sum_then_square.append(embeddings) square_then_sum.append(tf.square(embeddings)) print("feat num:", len(sum_then_square)) deep_inputs = tf.concat(sum_then_square, axis=1) deep_inputs = tf.concat([deep_inputs, dnn_data], axis=1) # sum_then_square: (batch_size, embedding) sum_then_square = tf.square(tf.add_n(sum_then_square)) # square_then_sum: (batch_size, embedding) square_then_sum = tf.add_n(square_then_sum) # poly_logits: (batch_size, 1) poly_outputs = 0.5 * tf.subtract(sum_then_square, square_then_sum) fm_part = tf.concat([linear_outputs, poly_outputs], 1) #dnn_logits = build_deep_layers(deep_inputs, hidden_units, mode, reg) logits = build_deepfm_layers(fm_part, deep_inputs, hidden_units, mode, reg) # logits: (batch_size, 1) #logits = linear_logits + poly_logits + dnn_logits # predictions logits = tf.nn.bias_add(logits, bias) preds = tf.nn.sigmoid(logits) tf.summary.histogram("preds", preds) # PREDICT mode if mode == tf.estimator.ModeKeys.PREDICT: predictions = {'prob': tf.concat([1 - preds, preds], 1)} export_outputs = { tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions) #线上预测需要> } return tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=export_outputs) else: ctr_loss = tf.losses.log_loss( labels, preds, weights=weight, reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE) optimizer = tf.train.AdamOptimizer(params['learning_rate']) ctr_auc = tf.metrics.auc(labels=labels, predictions=preds) reg_loss = tf.reduce_sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) tf.summary.scalar("loss/ctr_loss", ctr_loss) tf.summary.scalar("loss/reg_loss", reg_loss) loss = ctr_loss + reg_loss eval_metric_ops = {'auc': ctr_auc} update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize( loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops)
def build_input(features, params): cat_columns = params['cat_columns'] val_columns = params['val_columns'] column_to_field = params['column_to_field'] #dnn_columns = params['dnn_columns'] dimension_config = params['dimension_config'] reg = params['reg'] embed_dim = params['embed_dim'] embedding_table = EmbeddingTable() embedding_dict = OrderedDict() with tf.variable_scope("fm", reuse=tf.AUTO_REUSE, values=[features]) as scope: with tf.device('/cpu:0'): for name, col in cat_columns.items(): field = column_to_field.get(name, name) cur_dimension = dimension_config[field] if field in dimension_config else embed_dim embedding_table.add_linear_weights(vocab_name=name, vocab_size=col._num_buckets) embedding_table.add_embed_weights(vocab_name=field, vocab_size=col._num_buckets, embed_dim=cur_dimension, reg=reg) for name, col in val_columns.items(): field = column_to_field.get(name, name) cur_dimension = dimension_config[field] if field in dimension_config else embed_dim embedding_table.add_linear_weights(vocab_name=name, vocab_size=1) embedding_table.add_embed_weights(vocab_name=field, vocab_size=1, embed_dim=cur_dimension, reg=reg) builder = _LazyBuilder(features) # linear part linear_outputs = [] for name, col in cat_columns.items(): # get sparse tensor of input feature from feature column sp_tensor = col._get_sparse_tensors(builder) sp_ids = sp_tensor.id_tensor linear_weights = embedding_table.get_linear_weights(name) # linear_weights: (vocab_size, 1) # sp_ids: (batch_size, max_tokens_per_example) # sp_values: (batch_size, max_tokens_per_example) linear_output = embedding_ops.safe_embedding_lookup_sparse( linear_weights, sp_ids, None, combiner='sum', name='{}_linear_output'.format(name)) linear_outputs.append(linear_output) for name, col in val_columns.items(): dense_tensor = col._get_dense_tensor(builder) linear_weights = embedding_table.get_linear_weights(name) linear_output = tf.multiply(dense_tensor, linear_weights) linear_outputs.append(linear_output) # linear_outputs: (batch_szie, nonzero_feature_num) linear_outputs = tf.concat(linear_outputs, axis=1) # poly part for name, col, in cat_columns.items(): # get sparse tensor of input feature from feature column field = column_to_field.get(name, name) sp_tensor = col._get_sparse_tensors(builder) sp_ids = sp_tensor.id_tensor embed_weights = embedding_table.get_embed_weights(field) # embeddings: (batch_size, embed_dim) # x_i * v_i embeddings = embedding_ops.safe_embedding_lookup_sparse( embed_weights, sp_ids, None, combiner='sum', name='{}_{}_embedding'.format(field, name)) embedding_dict[field] = embeddings for name, col in val_columns.items(): field = column_to_field.get(name, name) dense_tensor = col._get_dense_tensor(builder) embed_weights = embedding_table.get_embed_weights(field) embeddings = tf.multiply(dense_tensor, embed_weights) embedding_dict[field] = embeddings with tf.variable_scope("dnn_embed"): x = tf.concat(list(embedding_dict.values()), axis=1) new_inputs = tf.concat([linear_outputs, x], 1) return new_inputs
def build_fm(features, labels, mode, params): """model fn""" # build embedding tables dimension = params['DIMENSION'] columns = params['FM_COLUMNS'] column_to_field = params['COLUMN_TO_FIELD'] embedding_table = EmbeddingTable() with tf.variable_scope("fm", reuse=tf.AUTO_REUSE, values=[features]) as scope: for name, col in columns.items(): field = column_to_field.get(name, name) embedding_table.add_linear_weights(vocab_name=name, vocab_size=col._num_buckets) embedding_table.add_embed_weights(vocab_name=field, vocab_size=col._num_buckets, embed_dim=dimension) # bias bias = tf.get_variable(name='bias', shape=(1, ), initializer=tf.constant_initializer(0.0)) builder = _LazyBuilder(features) # linear part linear_outputs = [] for name, col in columns.items(): # get sparse tensor of input feature from feature column sp_tensor = col._get_sparse_tensors(builder) sp_ids = sp_tensor.id_tensor linear_weights = embedding_table.get_linear_weights(name) # linear_weights: (vocab_size, 1) # sp_ids: (batch_size, max_tokens_per_example) # sp_values: (batch_size, max_tokens_per_example) # output: (batch_size, 1) output = embedding_ops.safe_embedding_lookup_sparse( linear_weights, sp_ids, None, combiner='sum', name='{}_linear_output'.format(name)) linear_outputs.append(output) # linear_outputs: (batch_szie, nonzero_feature_num) linear_outputs = tf.concat(linear_outputs, axis=1) # linear_logits: (batch_size, 1) linear_logits = tf.reduce_sum(linear_outputs, axis=1, keepdims=True, name='linear_logits') # poly part sum_then_square = [] square_then_sum = [] for name, col, in columns.items(): # get sparse tensor of input feature from feature column field = column_to_field.get(name, name) sp_tensor = col._get_sparse_tensors(builder) sp_ids = sp_tensor.id_tensor embed_weights = embedding_table.get_embed_weights(field) # embeddings: (batch_size, embed_dim) # x_i * v_i embeddings = embedding_ops.safe_embedding_lookup_sparse( embed_weights, sp_ids, None, combiner='sum', name='{}_{}_embedding'.format(field, name)) sum_then_square.append(embeddings) square_then_sum.append(tf.square(embeddings)) # sum_then_square: (batch_size, embedding) sum_then_square = tf.square(tf.add_n(sum_then_square)) # square_then_sum: (batch_size, embedding) square_then_sum = tf.add_n(square_then_sum) # poly_logits: (batch_size, 1) poly_logits = 0.5 * tf.reduce_sum( tf.subtract(sum_then_square, square_then_sum), axis=1, keepdims=True) # logits: (batch_size, 1) logits = linear_logits + poly_logits # predictions logits = tf.nn.bias_add(logits, bias) logistic = tf.nn.sigmoid(logits, name='logistic') two_class_logits = tf.concat((tf.zeros_like(logits), logits), axis=-1, name='two_class_logits') probabilities = tf.nn.softmax(two_class_logits, name='probabilities') class_ids = tf.argmax(two_class_logits, axis=-1, name="class_ids") class_ids = tf.expand_dims(class_ids, axis=-1) predictions = { 'logits': logits, 'logistic': logistic, 'probabilities': probabilities, 'class_ids': class_ids } # PREDICT mode if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels)) eval_metric_ops = { 'auc': tf.metrics.auc(labels, logistic) } # EVAL mode if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops) assert mode == tf.estimator.ModeKeys.TRAIN optimizer = tf.train.FtrlOptimizer(learning_rate=0.01, l1_regularization_strength=0.001, l2_regularization_strength=0.001) global_step = tf.train.get_or_create_global_step() train_op = optimizer.minimize(loss, global_step=global_step) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops)
def output_logits_from_bi_interaction(features, embedding_table, params): field2vocab_mapping = params['field_vocab_mapping'] # 论文上的公式就是要求sum,而且我也试过mean和sqrtn,都比用mean要差上很多 # 但是,这种情况,仅仅是针对criteo数据的,还是理论上就必须用sum,而不能用mean和sqrtn # 我还不太确定,所以保留一个接口能指定其他combiner的方法 combiner = params.get('multi_embed_combiner', 'sum') # 见《Neural Factorization Machines for Sparse Predictive Analytics》论文的公式(4) fields_embeddings = [] fields_squared_embeddings = [] for fieldname, vocabname in field2vocab_mapping.items(): sp_ids = features[fieldname + "_ids"] sp_values = features[fieldname + "_values"] # --------- embedding embed_weights = embedding_table.get_embed_weights(vocabname) # embedding: [batch_size, embed_dim] embedding = embedding_ops.safe_embedding_lookup_sparse( embed_weights, sp_ids, sp_values, combiner=combiner, name='{}_embedding'.format(fieldname)) fields_embeddings.append(embedding) # --------- square of embedding squared_emb_weights = tf.square(embed_weights) squared_sp_values = tf.SparseTensor(indices=sp_values.indices, values=tf.square(sp_values.values), dense_shape=sp_values.dense_shape) # squared_embedding: [batch_size, embed_dim] squared_embedding = embedding_ops.safe_embedding_lookup_sparse( squared_emb_weights, sp_ids, squared_sp_values, combiner=combiner, name='{}_squared_embedding'.format(fieldname)) fields_squared_embeddings.append(squared_embedding) # calculate bi-interaction sum_embedding_then_square = tf.square( tf.add_n(fields_embeddings)) # [batch_size, embed_dim] square_embedding_then_sum = tf.add_n( fields_squared_embeddings) # [batch_size, embed_dim] bi_interaction = 0.5 * ( sum_embedding_then_square - square_embedding_then_sum ) # [batch_size, embed_dim] tf.logging.info("bi-interaction, shape={}".format(bi_interaction.shape)) # calculate logits logits = tf.layers.dense(bi_interaction, units=1, use_bias=True, activation=None) # 因为FM与DNN共享embedding,所以除了logits,还返回各field的embedding,方便搭建DNN return logits, fields_embeddings
def common_minimize_trainable(self, base_opt, test_opt, name): base_opt = de.DynamicEmbeddingOptimizer(base_opt) test_opt = de.DynamicEmbeddingOptimizer(test_opt) id = 0 config = config_pb2.ConfigProto( allow_soft_placement=True, gpu_options=config_pb2.GPUOptions(allow_growth=True), ) for ( num_shards, k_dtype, d_dtype, initial_mode, dim, run_step, ) in itertools.product( [1, 2], [dtypes.int64], [ dtypes.float32, ], [ "constant", ], [1, 10], [10], ): with self.session(config=config, use_gpu=test_util.is_gpu_available()): id += 1 raw_init_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] raw_init_vals = [ [ x, ] * dim for x in [0.0, 0.1, 0.3, 0.8, 0.16, 0.25, 0.36, 0.49, 0.64, 0.81] ] raw_ids = constant_op.constant([1, 3, 3, 9], dtype=k_dtype) sp_ids = sparse_tensor.SparseTensor( indices=[ [0, 0], [0, 1], [1, 0], [2, 1], ], values=raw_ids, dense_shape=[3, 2], ) x = constant_op.constant([[_x * dim] for _x in [[0.4], [0.5], [0.6]]], dtype=d_dtype) x = array_ops.reshape(x, shape=(3 * dim, 1)) # base var prepare base_var = variables.Variable( np.array(raw_init_vals).reshape([len(raw_init_ids), dim]), dtype=d_dtype, shape=[len(raw_init_ids), dim], ) base_embedding = embedding_ops.safe_embedding_lookup_sparse( base_var, sp_ids, None, combiner="sum") base_embedding = array_ops.reshape(base_embedding, shape=[1, 3 * dim]) pred0 = math_ops.matmul(base_embedding, x) loss0 = pred0 * pred0 base_opt_op = base_opt.minimize(loss0, var_list=[base_var]) # test var prepare embeddings = de.get_variable( "s6030-" + name + str(id), key_dtype=k_dtype, value_dtype=d_dtype, devices=_get_devices() * num_shards, initializer=1.0, dim=dim, ) self.device_check(embeddings) init_ids = constant_op.constant(raw_init_ids, dtype=k_dtype) init_vals = constant_op.constant(raw_init_vals, dtype=d_dtype) init_op = embeddings.upsert(init_ids, init_vals) self.evaluate(init_op) # test branch test_var, trainable = de.safe_embedding_lookup_sparse( embeddings, sp_ids, sparse_weights=None, combiner="sum", return_trainable=True, ) pred1 = math_ops.matmul( array_ops.reshape(test_var, shape=[1, 3 * dim]), x) loss1 = pred1 * pred1 test_opt_op = test_opt.minimize(loss1, var_list=[trainable]) self.evaluate(variables.global_variables_initializer()) self.assertAllCloseAccordingToType( np.array(raw_init_vals).reshape([len(raw_init_ids), dim]), self.evaluate(base_var), ) # run base for _ in range(run_step): self.evaluate(base_opt_op) # Run `run_step` step of sgd for _ in range(run_step): self.evaluate(test_opt_op) table_var = array_ops.reshape(embeddings.lookup(init_ids), shape=[10, dim]) # Validate updated params self.assertAllCloseAccordingToType( self.evaluate(base_var), self.evaluate(table_var), msg="Cond:{},{},{},{},{}".format(num_shards, k_dtype, d_dtype, dim, run_step), )
def common_minimize_trainable(self, base_opt, test_opt, name): if test_util.is_gpu_available(): keys_type_list = [dtypes.int64] else: keys_type_list = [dtypes.int64, dtypes.string] deo.enable_train_mode() config = config_pb2.ConfigProto( allow_soft_placement=True, gpu_options=config_pb2.GPUOptions(allow_growth=True)) for run_id, num_shards, k_dtype, d_dtype, initial_mode, dim, run_step \ in _next_run_step_config(keys_type_list): with self.session(config=config, use_gpu=test_util.is_gpu_available()): raw_init_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] if k_dtype == dtypes.string: raw_init_ids = [str(i) for i in raw_init_ids] raw_init_vals = [ [ x, ] * dim for x in [0.0, 0.1, 0.3, 0.8, 0.16, 0.25, 0.36, 0.49, 0.64, 0.81] ] raw_ids_py = [1, 3, 3, 9] raw_ids_nn = constant_op.constant(raw_ids_py, dtype=dtypes.int64) sp_ids_nn = sparse_tensor.SparseTensor(indices=[ [0, 0], [0, 1], [1, 0], [2, 1], ], values=raw_ids_nn, dense_shape=[3, 2]) if k_dtype != dtypes.string: raw_ids_de = raw_ids_nn else: raw_ids_de = constant_op.constant( [str(i) for i in raw_ids_py], dtype=k_dtype) sp_ids_de = sparse_tensor.SparseTensor(indices=[ [0, 0], [0, 1], [1, 0], [2, 1], ], values=raw_ids_de, dense_shape=[3, 2]) x = constant_op.constant([[_x * dim] for _x in [[0.4], [0.5], [0.6]]], dtype=d_dtype) x = array_ops.reshape(x, shape=(3 * dim, 1)) # base var prepare base_var = variables.Variable(np.array(raw_init_vals).reshape( [len(raw_init_ids), dim]), dtype=d_dtype, shape=[len(raw_init_ids), dim]) base_embedding = embedding_ops.safe_embedding_lookup_sparse( base_var, sp_ids_nn, None, combiner='sum') base_embedding = array_ops.reshape(base_embedding, shape=[1, 3 * dim]) pred0 = math_ops.matmul(base_embedding, x) loss0 = pred0 * pred0 base_opt_op = base_opt.minimize(loss0, var_list=[base_var]) # test var prepare embeddings = deo.get_variable('s6030-' + name + str(run_id), key_dtype=k_dtype, value_dtype=d_dtype, devices=_get_devices() * num_shards, initializer=1., dim=dim) self.device_check(embeddings) init_ids = constant_op.constant(raw_init_ids, dtype=k_dtype) init_vals = constant_op.constant(raw_init_vals, dtype=d_dtype) init_op = embeddings.upsert(init_ids, init_vals) self.evaluate(init_op) # test branch test_var, trainable = deo.safe_embedding_lookup_sparse( embeddings, sp_ids_de, sparse_weights=None, combiner="sum", return_trainable=True) pred1 = math_ops.matmul( array_ops.reshape(test_var, shape=[1, 3 * dim]), x) loss1 = pred1 * pred1 test_opt_op = test_opt.minimize(loss1, var_list=[trainable]) self.evaluate(variables.global_variables_initializer()) self.assertAllCloseAccordingToType( np.array(raw_init_vals).reshape([len(raw_init_ids), dim]), self.evaluate(base_var)) # run base for _ in range(run_step): self.evaluate(base_opt_op) # Run `run_step` step of sgd for _ in range(run_step): self.evaluate(test_opt_op) table_var = array_ops.reshape(embeddings.lookup(init_ids), shape=[10, dim]) # Validate updated params self.assertAllCloseAccordingToType( self.evaluate(base_var), self.evaluate(table_var), msg="Cond:{},{},{},{},{}".format(num_shards, k_dtype, d_dtype, dim, run_step))