def __init__(self, in_dim, out_dim, normalize=True, bias=False, name='', ps_num=0): self._in_dim = in_dim self._out_dim = out_dim self._normalize = normalize self._bias = bias self._name = name self._partitioner = None if ps_num: self._partitioner = tf.min_max_variable_partitioner( max_partitions=ps_num, min_slice_size=DNN_PT_SIZE) self._vars = {} with tf.variable_scope(self._name + '/' + 'layer', reuse=tf.AUTO_REUSE, partitioner=self._partitioner): self._vars['weights'] = \ tf.get_variable(shape=[self._in_dim, self._out_dim], name='weights') if self._bias: self._vars['bias'] =\ tf.Variable(tf.zeros([self._out_dim], dtype=tf.float32), name='bias')
def __init__(self, emb_dim, is_train, train_dropout=1.0, input_dim=None, embeddings=None, scope="embeddings", use_tanh=False, num_ps_tasks=None): super(EmbeddingLookup, self).__init__() self.emb_dim = emb_dim self.is_train = is_train self.dropout = train_dropout self.use_tanh = use_tanh with tf.variable_scope(scope): if embeddings: self.embeddings = embeddings else: partitioner = None if num_ps_tasks: partitioner = tf.min_max_variable_partitioner( max_partitions=num_ps_tasks) self.embeddings = tf.get_variable( "embeddings", shape=(input_dim, self.emb_dim), initializer=tf.glorot_uniform_initializer(), partitioner=partitioner) if not embeddings: utils.add_variable_summaries(self.embeddings, scope)
def __init__(self, input_dim, output_dim, neigh_input_dim=None, bias=False, act=tf.nn.relu, name=None, ps_num=None): self.name = name if name is not None else 'gcn_agg' self.bias = bias self.act = act if neigh_input_dim is None: neigh_input_dim = input_dim self.neigh_input_dim = neigh_input_dim self.output_dim = output_dim self.input_dim = input_dim self.partitioner = None self.ps_num = ps_num if self.ps_num is not None: self.partitioner = tf.min_max_variable_partitioner( max_partitions=self.ps_num, min_slice_size=DNN_PT_SIZE) self.vars = {} with tf.variable_scope(self.name + '_vars', reuse=tf.AUTO_REUSE, partitioner=self.partitioner): self.vars['weights'] = glorot([neigh_input_dim, output_dim], name='weights') if self.bias: self.vars['bias'] = zeros([output_dim], name='bias')
def __init__(self, index, input_dim, output_dim=None, neigh_input_dim=None, ps_num=0, bias=False, act=tf.nn.relu, name=''): self._index = index self._name = name self._bias = bias self._act = act self._vars = {} self._input_dim = input_dim self._neigh_input_dim = neigh_input_dim self._output_dim = output_dim if self._neigh_input_dim is None: self._neigh_input_dim = input_dim if self._output_dim is None: self._output_dim = input_dim self._partitioner = None if ps_num: self._partitioner = tf.min_max_variable_partitioner( max_partitions=ps_num, min_slice_size=DNN_PT_SIZE)
def __init__(self, name, FLAGS, ps_num=None): self.node_size = FLAGS.node_count self.emb_size = FLAGS.dim self.FLAGS = FLAGS self.name = name self.s2h = FLAGS.s2h if FLAGS.ps_hosts: self.ps_num = len(FLAGS.ps_hosts.split(",")) elif ps_num: self.ps_num = ps_num # self.ps_num = len(FLAGS.ps_hosts.split(",")) emb_partitioner = tf.min_max_variable_partitioner(max_partitions=self.ps_num, min_slice_size=EMB_PT_SIZE) with tf.variable_scope(self.name + '_item_target_embedding', reuse=tf.AUTO_REUSE, partitioner=emb_partitioner) as scope: self.emb_table = tf.get_variable("emb_lookup_table", [self.node_size, self.emb_size], initializer=get_unit_emb_initializer(self.emb_size), partitioner=emb_partitioner) with tf.variable_scope(self.name + '_item_target_bias', reuse=tf.AUTO_REUSE, partitioner=emb_partitioner) as scope: self.bias_table = tf.get_variable("bias_lookup_table", [self.node_size], partitioner=emb_partitioner, initializer=tf.zeros_initializer(), trainable=False)
def __init__(self, input_dim, output_dim, neigh_input_dim=None, is_training=True, bias=False, act=tf.nn.relu, name=None, ps_num=None): self.name = name if name is not None else 'mean_agg' print('init mean aggregator') print('name=', self.name) self.bias = bias self.act = act self.vars = {} if neigh_input_dim is None: neigh_input_dim = input_dim self.neigh_input_dim = neigh_input_dim self.output_dim = output_dim self.input_dim = input_dim self.partitioner = None self.ps_num = ps_num if self.ps_num is not None: self.partitioner = tf.min_max_variable_partitioner( max_partitions=self.ps_num, min_slice_size=DNN_PT_SIZE) with tf.variable_scope(self.name + '_vars', reuse=tf.AUTO_REUSE, partitioner=self.partitioner): self.vars['weights'] = glorot([input_dim + neigh_input_dim, output_dim], name='weights') if self.bias: self.vars['bias'] = zeros([output_dim], name='bias')
def full_connect_sparse(self, train_inputs, weights_shape, sp_weights, scope_name): # weights #from tf_ps.ps_context import variable_info #with variable_info(batch_read=3000, var_type="hash"): if self.ps_num > 1: weights = tf.get_variable( "weights", weights_shape, initializer=self.get_initializer( stddev=self.config.embedding_stddev), partitioner=tf.min_max_variable_partitioner( max_partitions=self.ps_num)) else: weights = tf.get_variable("weights", weights_shape, initializer=self.get_initializer( stddev=self.config.embedding.stddev)) self.weights_map[scope_name] = weights sample_embedding = tf.nn.embedding_lookup_sparse(weights, sp_ids=train_inputs, sp_weights=sp_weights, combiner="mean") # no bias here return sample_embedding
def wide_model(numeric_input, category_input, vocabs): transpose_category_input = tf.transpose(category_input) category_sum = None # Append embadding category to numeric_sum for i in range(0, len(vocabs)): embedding = tf.get_variable("wideem" + str(i), [vocabs[i], 8], initializer=tf.contrib.layers.xavier_initializer(), #partitioner=tf.fixed_size_partitioner(n_pss)) partitioner=tf.min_max_variable_partitioner(n_pss, 0, 2 << 10)) # Pick one column from category input col = tf.nn.embedding_lookup(transpose_category_input, [i])[0] with tf.device("/job:worker/task:" + str(task_index)): embedding = tf.identity(embedding) # Same as make [0001]*[w1,w2,w3,w4] = lookup w4 embedded_col = tf.nn.embedding_lookup(embedding, col) # number * embedding output number if category_sum is None: category_sum = embedded_col else: category_sum = tf.concat([category_sum, embedded_col], 1) tf.set_random_seed(1) w = tf.get_variable("W", [numeric_input.shape[1] + category_sum.shape[1], 1], initializer=tf.contrib.layers.xavier_initializer()) wmodel_logits_sum = tf.matmul(tf.concat([numeric_input, category_sum], 1), w) return wmodel_logits_sum
def _partitioner(self): cluster_spec = os.environ.get("CLUSTER_SPEC") if not cluster_spec: return None ps_count = tf.train.ClusterSpec( json.loads(cluster_spec)).num_tasks("ps") return tf.min_max_variable_partitioner( max_partitions=ps_count, min_slice_size=conf.emb_min_slice_size)
def testInitFromPartitionVar(self): checkpoint_dir = self.get_temp_dir() with self.test_session() as session: v1 = _create_partition_checkpoints(session, checkpoint_dir) # New graph and session. with tf.Graph().as_default() as g: with self.test_session(graph=g) as session: with tf.variable_scope("some_scope"): my1 = tf.get_variable( name="my1", shape=[100, 100], initializer=tf.truncated_normal_initializer(0.5), partitioner=tf.min_max_variable_partitioner( max_partitions=5, axis=0, min_slice_size=8 << 10)) my1_var_list = my1._get_variable_list() tf.contrib.framework.init_from_checkpoint( checkpoint_dir, { "var1": "some_scope/my1", }) session.run(tf.global_variables_initializer()) my1_values = session.run(my1_var_list) self.assertAllEqual(my1_values, v1) # New graph and session. with tf.Graph().as_default() as g: with self.test_session(graph=g) as session: with tf.variable_scope("some_scope"): my1 = tf.get_variable( name="my1", shape=[100, 100], initializer=tf.truncated_normal_initializer(0.5), partitioner=tf.min_max_variable_partitioner( max_partitions=5, axis=0, min_slice_size=8 << 10)) my1_var_list = my1._get_variable_list() tf.contrib.framework.init_from_checkpoint( checkpoint_dir, { "var1": my1_var_list, }) session.run(tf.global_variables_initializer()) my1_values = session.run(my1_var_list) self.assertAllEqual(my1_values, v1)
def testInitFromPartitionVar(self): checkpoint_dir = self.get_temp_dir() with self.test_session() as session: v1 = _create_partition_checkpoints(session, checkpoint_dir) # New graph and session. with tf.Graph().as_default() as g: with self.test_session(graph=g) as session: with tf.variable_scope("some_scope"): my1 = tf.get_variable( name="my1", shape=[100, 100], initializer=tf.truncated_normal_initializer(0.5), partitioner=tf.min_max_variable_partitioner( max_partitions=5, axis=0, min_slice_size=8 << 10)) my1_var_list = my1._get_variable_list() tf.contrib.framework.init_from_checkpoint(checkpoint_dir, { "var1": "some_scope/my1", }) session.run(tf.initialize_all_variables()) my1_values = session.run(my1_var_list) self.assertAllEqual(my1_values, v1) # New graph and session. with tf.Graph().as_default() as g: with self.test_session(graph=g) as session: with tf.variable_scope("some_scope"): my1 = tf.get_variable( name="my1", shape=[100, 100], initializer=tf.truncated_normal_initializer(0.5), partitioner=tf.min_max_variable_partitioner( max_partitions=5, axis=0, min_slice_size=8 << 10)) my1_var_list = my1._get_variable_list() tf.contrib.framework.init_from_checkpoint(checkpoint_dir, { "var1": my1_var_list, }) session.run(tf.initialize_all_variables()) my1_values = session.run(my1_var_list) self.assertAllEqual(my1_values, v1)
def _build_linear_logits(features, linear_feature_columns, output_rank, num_ps_replicas, input_layer_min_slice_size, joint_linear_weights=False): ''' Function: 构建linear侧逻辑的接口,返回linear侧的logits Args: features(tensor_dic): 包含所有特征的tensor_dict linear_feature_columns(feature column set): 构成linear侧的feature column集合 output_rank(int): 期望的输出矩阵的秩 num_ps_replicas(int): PS replicas的数量 input_layer_min_slice_size(int): 输入层的最小分片大小 joint_linear_weights(bool): 是否使用联合线性权重,默认为False。如果为True,则使用 tf.contrib.layers.joint_weighted_sum_from_feature_colum 生成linear侧的logits,否则,使用 tf.contrib.layers.weighted_sum_from_feature_columns生成 linear侧的logits。 Returns: linear_logits(Tensor): 构成算法逻辑linear侧的logits ''' if not linear_feature_columns or len(linear_feature_columns) == 0: return None else: WideAndDeepAlgorithm._LINEAR_LEARNING_RATE = \ WideAndDeepAlgorithm._linear_learning_rate(WideAndDeepAlgorithm._LINEAR_LEARNING_RATE, len(linear_feature_columns)) linear_parent_scope = WideAndDeepAlgorithm.LINEAR_SCOPE_NAME linear_partitioner = ( tf.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size = input_layer_min_slice_size)) with tf.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=linear_partitioner) as scope: if joint_linear_weights: linear_logits, _, _ = tf.contrib.layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=output_rank, weight_collections=[linear_parent_scope], scope=scope) else: linear_logits, _, _ = tf.contrib.layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=output_rank, weight_collections=[linear_parent_scope], scope=scope) return linear_logits
def _testMinMaxVariablePartitioner(self, max_partitions, axis, min_slice_size, var_name, var_shape, expected_axis_shards, expected_partitions): partitioner = tf.min_max_variable_partitioner(max_partitions=max_partitions, axis=axis, min_slice_size=min_slice_size) with tf.variable_scope("root", partitioner=partitioner): v0 = tf.get_variable(var_name, dtype=tf.float32, shape=var_shape) v0_list = v0._get_variable_list() v0_part = v0._get_partitions() self.assertEqual(len(v0_list), expected_axis_shards) self.assertAllEqual(v0_part, expected_partitions)
def _EmbeddingParamsAsPartitionedVariable(num_shards, vocab_size, dtype=tf.float32, shape=None): p, params, feed_dict = _EmbeddingParams( num_shards, vocab_size, dtype=dtype, shape=shape) shape = shape or [10] partitioned_variable = tf.get_variable( "p", shape=[vocab_size] + shape, initializer=tf.concat(0, [params[p_i.name] for p_i in p]), partitioner=tf.min_max_variable_partitioner( max_partitions=num_shards, min_slice_size=1)) return p, partitioned_variable, params, feed_dict
def __init__(self, categorical_features_dict, total_feature_num, output_dim, use_input_bn=True, act=None, need_dense=True, ps_hosts=None, name='', is_training=True, multivalent_cols_num=0): self._output_dim = output_dim self._feature_num = total_feature_num self._act = act self._use_input_bn = use_input_bn self._need_dense = need_dense self._name=name self._is_training = is_training self._categorical_features = \ self._parse_feature_config(categorical_features_dict) if multivalent_cols_num > 0: self._multivalent_features = self._categorical_features[-multivalent_cols_num:] self._categorical_features = self._categorical_features[:-multivalent_cols_num] else: self._multivalent_features = [] partitioner = None if ps_hosts: partitioner = \ tf.min_max_variable_partitioner(max_partitions=len(ps_hosts.split(",")), min_slice_size=EMB_PT_SIZE) self._emb_table = {} self._offsets = {} with tf.variable_scope(self._name + 'feature_emb_table', reuse=tf.AUTO_REUSE): if self._categorical_features: total_max_num = 0 self.emb_dim = 16 for idx, attr_name, max_num, emb_dim in self._categorical_features: self.emb_dim = emb_dim self._offsets[idx] = tf.constant(total_max_num) total_max_num += max_num self._emb_table["coalesced_embed"] = \ tf.get_variable("emb_lookup_table_", [total_max_num, self.emb_dim], partitioner=partitioner) if self._multivalent_features: for _, attr_name, max_num, emb_dim in self._multivalent_features: self._emb_table[attr_name] = \ tf.get_variable("emb_lookup_sparse_table_" + attr_name, [max_num, emb_dim], partitioner=partitioner)
def _create_partition_checkpoints(sess, checkpoint_dir): checkpoint_prefix = os.path.join(checkpoint_dir, "model") checkpoint_state_name = "checkpoint" v1 = tf.get_variable( name="var1", shape=[100, 100], initializer=tf.truncated_normal_initializer(0.5), partitioner=tf.min_max_variable_partitioner(max_partitions=5, axis=0, min_slice_size=8 << 10), ) sess.run(tf.initialize_all_variables()) v1_value = sess.run(v1._get_variable_list()) saver = tf.train.Saver() saver.save(sess, checkpoint_prefix, global_step=0, latest_filename=checkpoint_state_name) return v1_value
def encode(self, x_categorical=None, x_continuous=None, ids=None): """ x_categorical: [K_1, K_2, ..., n_categoricals] x_continuous: [K_1, K_2, ..., n_continuous] ids: [K_1, K_2, ...] """ partitioner = None if self.ps_num is not None: partitioner = tf.min_max_variable_partitioner( max_partitions=self.ps_num, min_slice_size=DNN_PT_SIZE) with tf.variable_scope('encoding_' + self.encoder_name, reuse=tf.AUTO_REUSE, partitioner=partitioner) as scope: x_batch = [] if self.encode_id_num > 0 and self.encode_id_dim > 0: assert ids is not None # id_idx = tf.string_to_hash_bucket_fast(ids, self.encode_id_num, name='id_to_hash_idx') id_emb = tf.nn.embedding_lookup(self.emb_table['id'], ids, name='embedding_lookup_id') x_batch.append(id_emb) if self.categorial_features is not None and len(self.categorial_features) > 0: assert x_categorical is not None to_concats_cat = [] for i, _ in enumerate(self.categorial_features): emb = tf.nn.embedding_lookup( self.emb_table[i], x_categorical[..., i], name='embedding_lookup_{}_{}'.format(self.encoder_name, i)) to_concats_cat.append(emb) x_categorical = tf.concat(to_concats_cat, axis=-1, name='cate_concat') x_batch.append(x_categorical) if self.continuous_features is not None and self.continuous_features > 0: assert x_continuous is not None if self.use_input_bn: x_continuous = tf.layers.batch_normalization(x_continuous, training=self.is_training, name='input_bn') x_batch.append(x_continuous) if len(x_batch) > 1: x_batch = tf.concat(x_batch, axis=-1, name='concat_cat_n_cont') else: x_batch = x_batch[0] for i, dense_dim in enumerate(self.dense_dims): x_batch = tf.layers.dense(x_batch, dense_dim, activation=self.act, name='dense_{}'.format(i)) if self.dropout: x_batch = tf.layers.dropout(x_batch, rate=self.dropout, training=self.is_training, name='dropout') return x_batch
def embedding_lookup(input_ids, vocab_size, embedding_size=128, initializer_range=0.02, word_embedding_name="word_embeddings", use_one_hot_embeddings=False): """Looks up words embeddings for id tensor. Args: input_ids: int32 Tensor of shape [batch_size, seq_length] containing word ids. vocab_size: int. Size of the embedding vocabulary. embedding_size: int. Width of the word embeddings. initializer_range: float. Embedding initialization range. word_embedding_name: string. Name of the embedding table. use_one_hot_embeddings: bool. If True, use one-hot method for word embeddings. If False, use `tf.nn.embedding_lookup()`. One hot is better for TPUs. Returns: float Tensor of shape [batch_size, seq_length, embedding_size]. """ # This function assumes that the input is of shape [batch_size, seq_length, # num_inputs]. # # If the input is a 2D tensor of shape [batch_size, seq_length], we # reshape to [batch_size, seq_length, 1]. if input_ids.shape.ndims == 2: input_ids = tf.expand_dims(input_ids, axis=[-1]) embedding_table = tf.get_variable( name=word_embedding_name, shape=[vocab_size, embedding_size], initializer=create_initializer(initializer_range), partitioner=tf.min_max_variable_partitioner(max_partitions=12, min_slice_size=4 << 20)) if use_one_hot_embeddings: flat_input_ids = tf.reshape(input_ids, [-1]) one_hot_input_ids = tf.one_hot(flat_input_ids, depth=vocab_size) output = tf.matmul(one_hot_input_ids, embedding_table) else: output = tf.nn.embedding_lookup(embedding_table, input_ids) input_shape = get_shape_list(input_ids) output = tf.reshape(output, input_shape[0:-1] + [input_shape[-1] * embedding_size]) return (output, embedding_table)
def _create_partition_checkpoints(sess, checkpoint_dir): checkpoint_prefix = os.path.join(checkpoint_dir, "model") checkpoint_state_name = "checkpoint" v1 = tf.get_variable( name="var1", shape=[100, 100], initializer=tf.truncated_normal_initializer(0.5), partitioner=tf.min_max_variable_partitioner(max_partitions=5, axis=0, min_slice_size=8 << 10)) sess.run(tf.initialize_all_variables()) v1_value = sess.run(v1._get_variable_list()) saver = tf.train.Saver() saver.save(sess, checkpoint_prefix, global_step=0, latest_filename=checkpoint_state_name) return v1_value
def build_subnetwork(self, features, logits_dimension, training, iteration_step, summary, previous_ensemble=None): seed = self._seed if previous_ensemble: # Increment seed so different iterations don't learn the exact same thing. seed += 1 num_ps_replicas = self._config.num_ps_replicas if self._config else 0 partitioner = tf.min_max_variable_partitioner( max_partitions=num_ps_replicas) with tf.variable_scope("dnn", partitioner=partitioner): shared = {} with tf.variable_scope("hidden_layer"): w = tf.get_variable( shape=[2, self._layer_size], initializer=tf.glorot_uniform_initializer(seed=seed), name="weight") hidden_layer = tf.matmul(features["x"], w) if previous_ensemble: other_hidden_layer = previous_ensemble.weighted_subnetworks[ -1].subnetwork.shared["hidden_layer"] hidden_layer = tf.concat([hidden_layer, other_hidden_layer], axis=1) # Use a leaky-relu activation so that gradients can flow even when # outputs are negative. Leaky relu has a non-zero slope when x < 0. # Otherwise success at learning is completely dependent on random seed. hidden_layer = tf.nn.leaky_relu(hidden_layer, alpha=.2) shared["hidden_layer"] = hidden_layer with tf.variable_scope("logits"): logits = tf.layers.dense( hidden_layer, logits_dimension, kernel_initializer=tf.glorot_uniform_initializer( seed=seed)) summary.scalar("scalar", 3) return Subnetwork(last_layer=logits, logits=logits, complexity=3, shared=shared)
def __init__(self, encoder_name, categorial_features, continuous_features, FLAGS, dense_dims=(512,), use_input_bn=True, activation='leaky_relu', dropout=0., encode_id_num=0, encode_id_dim=32, is_training=True, ps_num=None): """ :param categorial_features: [n_categories, embedding_dim] :param continuous_features: int """ self.is_training = is_training self.dense_dims = dense_dims self.encoder_name = encoder_name self.categorial_features = categorial_features self.continuous_features = continuous_features self.emb_table = {} self.use_input_bn = use_input_bn self.FLAGS = FLAGS self.dropout = dropout self.act = None self.encode_id_num = encode_id_num self.encode_id_dim = encode_id_dim if activation == 'leaky_relu': self.act = tf.nn.leaky_relu elif activation == 'sigmoid': self.act = tf.nn.sigmoid elif activation == 'tanh': self.act = tf.nn.tanh emb_partitioner = None self.ps_num = ps_num if ps_num is not None: emb_partitioner = tf.min_max_variable_partitioner(max_partitions=self.ps_num, min_slice_size=EMB_PT_SIZE) with tf.variable_scope('emb_table_' + self.encoder_name, reuse=tf.AUTO_REUSE) as scope: if self.categorial_features is not None and len(self.categorial_features) > 0: for i, (n_categories, dim) in enumerate(self.categorial_features): if emb_partitioner is not None: self.emb_table[i] = tf.get_variable( "emb_lookup_table_{}".format(i), [n_categories, dim], partitioner=emb_partitioner) else: self.emb_table[i] = tf.get_variable( "emb_lookup_table_{}".format(i), [n_categories, dim]) if self.encode_id_num > 0 and self.encode_id_dim > 0: self.emb_table['id'] = tf.get_variable( "emb_lookup_table_id", [self.encode_id_num, self.encode_id_dim] )
def _partitioner(self, partitioner='min_max'): """ Args: partitioner: 'min_max' or 'fixed'. Returns: A tensorflow Partitioner or None. """ max_parts = conf.emb_max_partitions if max_parts is not None: if partitioner == 'min_max': return tf.min_max_variable_partitioner( max_partitions=max_parts, min_slice_size=conf.emb_min_slice_size) else: return tf.fixed_size_partitioner(num_shards=max_parts) else: return None
def __init__(self, out_dim, num_heads=1, concat=False, dropout=0.0, bias=False, name='', ps_num=0): self._out_dim = out_dim self._num_heads = num_heads self._concat = concat self._dropout = dropout self._bias = bias self._name = name self._partitioner = None if ps_num: self._partitioner = tf.min_max_variable_partitioner( max_partitions=ps_num, min_slice_size=DNN_PT_SIZE) self._vars = {} with tf.variable_scope(self._name + '/' + 'layer', reuse=tf.AUTO_REUSE, partitioner=self._partitioner): self._vars['attn_src'] = \ tf.get_variable(shape=[1, self._num_heads, self._out_dim], name='attention_weights_src') self._vars['attn_dst'] = \ tf.get_variable(shape=[1, self._num_heads, self._out_dim], name='attention_weights_dst') self._linear = \ tf.keras.layers.Dense(units=self._num_heads*self._out_dim, use_bias=False, name=name + 'w') if self._bias: if self._concat: self._vars['bias'] =\ tf.Variable(tf.zeros([self._out_dim], dtype=tf.float32), name='bias') else: self._vars['bias'] =\ tf.Variable(tf.zeros([self._num_heads * self._out_dim], dtype=tf.float32), name='bias')
def bloom_filter_emb(ids, hashes, zero_pad=True, mark_for_serving=True): ids_flat = tf.reshape(ids, [-1]) e = [] for h in hashes: e.append( get_emb_variable( name=h['table_name'], ids=h['hash_fn'](ids_flat), shape=(h['bucket_size'], h['emb_dim']), mark_for_serving=mark_for_serving, initializer=get_unit_emb_initializer(FLAGS.dim) )) # important: use normal, not uniform e = tf.concat(e, axis=1) if len(hashes) == 1 and hashes[0]['emb_dim'] == FLAGS.dim: print('bloom filter w/o fc: [%s]' % hashes[0]['table_name']) else: dnn_name = 'dnn__' + '__'.join(h['table_name'] for h in hashes) dnn_in_dim = sum(h['emb_dim'] for h in hashes) dnn = get_dnn_variable( name=dnn_name, shape=[dnn_in_dim, FLAGS.dim], initializer=tf.glorot_normal_initializer( ), # important: use normal, not uniform partitioner=tf.min_max_variable_partitioner( max_partitions=self.ps_num, min_slice_size=FLAGS.dnn_pt_size)) e = tf.matmul(e, dnn) if zero_pad: id_eq_zero = tf.tile( tf.expand_dims(tf.equal(ids_flat, 0), -1), [1, FLAGS.dim]) e = tf.where(id_eq_zero, tf.zeros_like(e), e) e = tf.reshape(e, get_shape(ids) + [FLAGS.dim]) return e
def __init__(self, num, dim, init=None, str2hash=True, ps_hosts=None, use_edge=False, name=''): self._num = num self._dim = dim self._str2hash = str2hash self._name = name self._use_edge = use_edge self._initializer = init ps_num = 1 if ps_hosts: ps_num = len(ps_hosts.split(",")) emb_partitioner = \ tf.min_max_variable_partitioner(max_partitions=ps_num, min_slice_size=EMB_PT_SIZE) with tf.variable_scope(self._name + 'lookup_embedding', reuse=tf.AUTO_REUSE, partitioner=emb_partitioner): self._emb_table = tf.get_variable("emb_lookup_table", [self._num, self._dim], initializer=self._initializer, partitioner=emb_partitioner) with tf.variable_scope(self._name + 'lookup_bias', reuse=tf.AUTO_REUSE, partitioner=emb_partitioner): self._bias_table = tf.get_variable( "bias_lookup_table", [self._num], partitioner=emb_partitioner, initializer=tf.zeros_initializer(), trainable=False)
def deep_model(numeric_input, category_input, vocabs, hidden1, hidden2, hidden3): embedding_output_cnt = 8 transpose_category_input = tf.transpose(category_input) # append emmbadding category input to numeric for i in range(0, len(vocabs)): embedding = tf.get_variable("deepem" + str(i), [vocabs[i], embedding_output_cnt], initializer=tf.contrib.layers.xavier_initializer(), #partitioner=tf.fixed_size_partitioner(n_pss)) partitioner=tf.min_max_variable_partitioner(n_pss, 0, 2 << 10)) # Pick one column from category input col = tf.nn.embedding_lookup(transpose_category_input, [i])[0] with tf.device("/job:worker/task:" + str(task_index)): embedding = tf.identity(embedding) embedding_category = tf.nn.embedding_lookup(embedding, col) # batch_size*embedding_output_cnt numeric_input = tf.concat([numeric_input, embedding_category], 1) # init W1 = tf.get_variable("W1", [numeric_input.shape[1], hidden1], initializer=tf.contrib.layers.xavier_initializer()) b1 = tf.get_variable("b1", [hidden1], initializer=tf.zeros_initializer()) # W2 = tf.get_variable("W2", [hidden1, hidden2], initializer=tf.contrib.layers.xavier_initializer()) # b2 = tf.get_variable("b2", [hidden2], initializer=tf.zeros_initializer()) # W3 = tf.get_variable("W3", [hidden2, hidden3], initializer=tf.contrib.layers.xavier_initializer()) # b3 = tf.get_variable("b3", [hidden3], initializer=tf.zeros_initializer()) # forward Z1 = tf.add(tf.matmul(numeric_input, W1), b1) # Z1 = np.dot(W1, X) + b1 A1 = tf.nn.tanh(Z1) # A1 = relu(Z1) # Z2 = tf.add(tf.matmul(A1, W2), b2) # Z2 = np.dot(W2, a1) + b2 # A2 = tf.nn.tanh(Z2) # A2 = relu(Z2) # Z3 = tf.add(tf.matmul(A2, W3), b3) # Z3 = np.dot(W3,Z2) + b3 # A3 = tf.nn.tanh(Z3) return A1
def __init__(self, categorical_features_dict, total_feature_num, output_dim, use_input_bn=True, act=None, need_dense=True, ps_hosts=None, name=''): self._output_dim = output_dim self._categorical_features = \ self._parse_feature_config(categorical_features_dict) self._feature_num = total_feature_num self._use_input_bn = use_input_bn self._need_dense = need_dense self._name = name self._emb_table = {} self._act = act emb_partitioner = None self.ps_num = 1 if ps_hosts: ps_num = len(ps_hosts.split(",")) emb_partitioner = \ tf.min_max_variable_partitioner(max_partitions=ps_num, min_slice_size=EMB_PT_SIZE) with tf.variable_scope(self._name + 'feature_emb_table', reuse=tf.AUTO_REUSE): if self._categorical_features: for _, attr_name, max_num, emb_dim in self._categorical_features: self._emb_table[attr_name] = \ tf.get_variable("emb_lookup_table_" + attr_name, [max_num, emb_dim], partitioner=emb_partitioner)
def full_connect(self, train_inputs, weights_shape, biases_shape, activation_fn, scope_name): # weights if self.ps_num > 1: weights = tf.get_variable( "weights", weights_shape, initializer=self.get_initializer( stddev=self.config.network_stddev), regularizer=tf.nn.l2_loss, partitioner=tf.min_max_variable_partitioner( max_partitions=self.ps_num)) else: weights = tf.get_variable("weights", weights_shape, initializer=self.get_initializer( stddev=self.config.network_stddev), regularizer=tf.nn.l2_loss) self.weights_map[scope_name] = weights # biases biases = tf.get_variable("biases", biases_shape, initializer=tf.constant_initializer( value=self.config.biases_init_value)) self.weights_map['%s_biases' % scope_name] = biases out = tf.nn.bias_add(tf.matmul(train_inputs, weights), biases) if activation_fn != None: out = activation_fn(out) else: print("warning! no activation fn !") return out
def main(_): # Configuration. # Problem. with tf.variable_scope("problem", partitioner=tf.min_max_variable_partitioner( max_partitions=2, min_slice_size=10 << 10)): problem, net_config, net_assignments = util.get_config(FLAGS.problem) loss = problem() global_step = tf.Variable(0, dtype=tf.int64, trainable=False) # Optimizer setup. var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='problem') print(var_list) #adam_opt = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) adam_opt = tf.train.AdamOptimizer(FLAGS.learning_rate) opt = adam_opt.minimize(loss, global_step) if FLAGS.mode != 1: optimizer = l2l_optimizer.L2LOptimizer( internal_optimizer=adam_opt, loss_func=problem, opt_last=FLAGS.opt_last, preprocessor=LogAndSign(10), co_opt=FLAGS.co_opt, rnn_layer_cnt=FLAGS.layer, delta_ratio=FLAGS.delta_ratio, update_ratio=FLAGS.update_ratio, dynamic_unroll=FLAGS.dynamic_unroll) opt = optimizer.minimize(loss, global_step=global_step, unroll_len=FLAGS.unroll_len) if FLAGS.mode == 1: print('use adam opt') else: print('use l2l opt') slot_reset = tf.no_op() if FLAGS.mode != 1: slot_reset = tf.variables_initializer(optimizer._slot_vars + optimizer._opt_vars) init = tf.group( *[tf.global_variables_initializer(), tf.local_variables_initializer()]) var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) print(var_list) #saver = tf.train.Saver(var_list = var_list) with ms.MonitoredSession() as sess: #with tf.Session() as sess: # Prevent accidental changes to the graph. tf.get_default_graph().finalize() sess.run(init) print('trainable variables') trainable_vars = tf.trainable_variables() for v in trainable_vars: print("parameter:", v.name, "device:", v.device, "shape:", v.get_shape()) best_evaluation = float("inf") total_time = 0 accum_loss = 0.0 total_cost = 0 for e in xrange(FLAGS.num_epochs): # Training. step, curr_loss, _ = sess.run([global_step, loss, opt]) accum_loss += curr_loss if step % 100 == 0: print('step:%d,loss:%f' % (step, accum_loss / 100)) accum_loss = 0 if step % FLAGS.reset_interval == 0: #print('reset') sess.run(slot_reset)
def __init__(self, FLAGS, global_step, batch_size1, batch_size2, num_node, model="train"): self.global_step = global_step self.batch_size1 = batch_size1 self.FLAGS = FLAGS alpha = FLAGS.alpha beta = FLAGS.beta gamma = FLAGS.gamma learning_rate = FLAGS.learning_rate learning_algo = FLAGS.learning_algo n_node_type = FLAGS.n_node_type n_edge_type = FLAGS.n_edge_type edge_dim = FLAGS.edge_dim self.embedding_dim = FLAGS.embedding_dim init_range = np.sqrt(3.0 / (num_node + self.embedding_dim)) with tf.name_scope("parameters"): if FLAGS.distributed_run: self.embedding_table = tf.get_variable( 'hep', [num_node, self.embedding_dim], initializer=tf.random_uniform( [num_node, self.embedding_dim], minval=-init_range, maxval=init_range, dtype=tf.float32), partitioner=tf.min_max_variable_partitioner( max_partitions=len(FLAGS.ps_hosts.split(",")))) else: self.embedding_table = tf.get_variable( 'hep', [num_node, self.embedding_dim], initializer=tf.random_uniform( [num_node, self.embedding_dim], minval=-init_range, maxval=init_range, dtype=tf.float32)) self.ids = tf.placeholder(dtype=tf.int32, shape=[None], name="ids") self.negs = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.n_negs], name="Negs") self.id_types = tf.placeholder(dtype=tf.int32, shape=[None], name="id_types") self.nbrs = [ tf.placeholder(dtype=tf.int32, shape=[None], name="nbrs_{}".format(i)) for i in range(FLAGS.n_node_type) ] self.segments = [ tf.placeholder(dtype=tf.int32, shape=[None], name="segs_{}".format(i)) for i in range(FLAGS.n_node_type) ] self.edge_types = [ tf.placeholder(dtype=tf.int32, shape=[None], name="types_{}".format(i)) for i in range(FLAGS.n_node_type) ] self.edge_features = [ tf.placeholder(dtype=tf.float32, shape=[None, FLAGS.edge_dim], name="features_{}".format(i)) for i in range(FLAGS.n_node_type) ] self.weight_ab = [ tf.placeholder(dtype=tf.float32, shape=[None], name="fweight_{}".format(i)) for i in range(FLAGS.n_node_type) ] # self.sampled_nodes = tf.placeholder(dtype=tf.float32, name="sampled_nodes") # self.sampled_edges = tf.placeholder(dtype=tf.float32, name="sampled_edges") self.uids = tf.placeholder(dtype=tf.int32, shape=[None], name="uids") self.sids = tf.placeholder(dtype=tf.int32, shape=[None], name="sids") self.labels = tf.placeholder(dtype=tf.int32, shape=[None], name="labels") self.batch_size2 = tf.size(self.ids) with tf.name_scope("LP"): # pids, labels = inputs1 self.W = tf.get_variable("w", [self.embedding_dim, self.embedding_dim], dtype=tf.float32) self.b = tf.get_variable("b", [1], dtype=tf.float32) id_left_lookup = tf.gather(self.embedding_table, self.uids) id_right_lookup = tf.gather(self.embedding_table, self.sids) self.l1_loss = self.L1(id_left_lookup, id_right_lookup, self.labels, self.W) if model == "train": with tf.name_scope("EP"): # self.edge_weight = tf.get_variable("edge_weight", [n_edge_type, edge_dim], dtype=tf.float32) # self.edge_b = tf.get_variable("edge_b", [n_edge_type], dtype=tf.float32) self.edge_weight = tf.get_variable("edge_weight", [edge_dim, 1], dtype=tf.float32) self.edge_b = tf.get_variable("edge_b", [1], dtype=tf.float32) self.node_W = tf.get_variable( "node_W", [n_node_type * self.embedding_dim, self.embedding_dim], dtype=tf.float32) self.node_b = tf.get_variable("node_b", [self.embedding_dim], dtype=tf.float32) embs_lookup = tf.nn.embedding_lookup(self.embedding_table, self.ids) negs_lookup = tf.nn.embedding_lookup(self.embedding_table, self.negs) self.l2_loss = self.L2(embs_lookup, negs_lookup, self.nbrs, self.segments, self.edge_features, self.edge_types, FLAGS.n_node_type, self.weight_ab) with tf.name_scope("Regu"): omega = tf.reduce_mean(tf.multiply( self.W, self.W)) + tf.reduce_mean( tf.multiply(self.node_W, self.node_W) ) + tf.reduce_mean(tf.multiply( self.node_b, self.node_b)) + tf.reduce_mean( tf.multiply(self.edge_weight, self.edge_weight) ) + tf.reduce_mean( tf.multiply(self.edge_b, self.edge_b)) + tf.reduce_mean( tf.multiply(self.b, self.b)) with tf.name_scope("loss"): if learning_algo == "adam": self.optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate) elif learning_algo == "sgd": self.optimizer = tf.train.GradientDescentOptimizer( learning_rate=learning_rate) else: self.optimizer = tf.train.AdagradOptimizer( learning_rate=learning_rate) self.loss = self.l1_loss + alpha * self.l2_loss + beta * ( omega) # self.sampled_nodes = tf.size(tf.unique(nbrs[0]).y) + tf.size(tf.unique(nbrs[1]).y) + tf.size(tf.unique(nbrs[2]).y) # self.sampled_edges = tf.size(nbrs[0]) + tf.size(nbrs[1]) + tf.size(nbrs[2]) with tf.name_scope("opt"): self.opt_op = self.optimizer.minimize( self.loss, global_step=self.global_step) self.merged = tf.summary.merge_all() self.train_op = [self.opt_op, self.merged] with tf.name_scope("outs"): self.output = tf.reduce_sum( tf.multiply(tf.matmul(id_left_lookup, self.W), id_right_lookup), 1) with tf.name_scope("init"): self.init_saver()
def _wide_deep_combined_model_fn( features, labels, mode, head, model_type, with_cnn=False, cnn_optimizer='Adagrad', linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_connected_mode=None, input_layer_partitioner=None, config=None): """Wide and Deep combined model_fn. (Dnn, Cnn, Linear) Args: features: dict of `Tensor`. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. model_type: one of `wide_deep`, `deep`, `wide_deep`. with_cnn: Bool, set True to combine image input featrues using cnn. cnn_optimizer: String, `Optimizer` object, or callable that defines the optimizer to use for training the CNN model. Defaults to the Adagrad optimizer. linear_feature_columns: An iterable containing all the feature columns used by the Linear model. linear_optimizer: String, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. dnn_optimizer: String, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. dnn_hidden_units: List of hidden units per DNN layer. dnn_connected_mode: List of connected mode. dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. dnn_batch_norm: Bool, add BN layer after each DNN layer input_layer_partitioner: Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing, or features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) if with_cnn: try: cnn_features = features.pop('image') # separate image feature from input_fn except KeyError: raise ValueError('No input image features, must provide image features if use cnn.') num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = input_layer_partitioner or ( tf.min_max_variable_partitioner(max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # weight decay lr global_step = tf.Variable(0) _LINEAR_LEARNING_RATE = tf.train.exponential_decay( _linear_init_learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=_linear_decay_rate, staircase=False) _DNN_LEARNING_RATE = tf.train.exponential_decay( _dnn_init_learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=_dnn_decay_rate, staircase=False) _CNN_LEARNING_RATE = tf.train.exponential_decay( _cnn_init_learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=_cnn_decay_rate, staircase=False) # Build DNN Logits. dnn_parent_scope = 'dnn' if model_type == 'wide_deep' or not dnn_feature_columns: dnn_logits = None else: dnn_optimizer = get_optimizer_instance( dnn_optimizer, learning_rate=_DNN_LEARNING_RATE) if model_type == 'wide_deep': check_no_sync_replicas_optimizer(dnn_optimizer) dnn_partitioner = tf.min_max_variable_partitioner(max_partitions=num_ps_replicas) with tf.variable_scope( dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): dnn_logit_fn = multidnn_logit_fn_builder( units=head.logits_dimension, hidden_units_list=dnn_hidden_units, connected_mode_list=dnn_connected_mode, feature_columns=dnn_feature_columns, input_layer_partitioner=input_layer_partitioner ) dnn_logits = dnn_logit_fn(features=features, mode=mode) # Build Linear Logits. linear_parent_scope = 'linear' if model_type == 'deep' or not linear_feature_columns: linear_logits = None else: linear_optimizer = get_optimizer_instance(linear_optimizer, learning_rate=_LINEAR_LEARNING_RATE) check_no_sync_replicas_optimizer(linear_optimizer) with tf.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as scope: logit_fn = linear_logit_fn_builder( units=head.logits_dimension, feature_columns=linear_feature_columns) linear_logits = logit_fn(features=features) add_layer_summary(linear_logits, scope.name) # Build CNN Logits. cnn_parent_scope = 'cnn' if not with_cnn: cnn_logits = None else: cnn_optimizer = get_optimizer_instance( cnn_optimizer, learning_rate=_CNN_LEARNING_RATE) with tf.variable_scope( cnn_parent_scope, values=tuple([cnn_features]), partitioner=input_layer_partitioner) as scope: img_vec = Vgg16().build(cnn_features) cnn_logits = tf.layers.dense( img_vec, units=head.logits_dimension, kernel_initializer=tf.glorot_uniform_initializer(), name=scope) add_layer_summary(cnn_logits, scope.name) # Combine logits and build full model. logits_combine = [] # _BinaryLogisticHeadWithSigmoidCrossEntropyLoss, logits_dimension=1 for logits in [dnn_logits, linear_logits, cnn_logits]: # shape: [batch_size, 1] if logits is not None: logits_combine.append(logits) logits = tf.add_n(logits_combine) def _train_op_fn(loss): """Returns the op to optimize the loss.""" train_ops = [] global_step = tf.train.get_global_step() # BN, when training, the moving_mean and moving_variance need to be updated. By default the # update ops are placed in tf.GraphKeys.UPDATE_OPS, so they need to be added as a dependency to the train_op update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): if dnn_logits is not None: train_ops.append( dnn_optimizer.minimize( loss, global_step=global_step, var_list=tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope))) if linear_logits is not None: train_ops.append( linear_optimizer.minimize( loss, global_step=global_step, var_list=tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=linear_parent_scope))) if cnn_logits is not None: train_ops.append( cnn_optimizer.minimize( loss, global_step=global_step, var_list=tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=cnn_parent_scope))) # Create an op that groups multiple ops. When this op finishes, # all ops in inputs have finished. This op has no output. train_op = tf.group(*train_ops) with tf.control_dependencies([train_op]): # Returns a context manager that specifies an op to colocate with. with tf.colocate_with(global_step): return tf.assign_add(global_step, 1) return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _dnn_model_fn(features, labels, mode, head, optimizer='Adagrad', input_layer_partitioner=None, config=None): """Deep Neural Net model_fn. Args: features: dict of `Tensor`. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `head_lib._Head` instance. hidden_units: Iterable of integer number of hidden units per layer. feature_columns: Iterable of `feature_column._FeatureColumn` model inputs. optimizer: String, `tf.Optimizer` object, or callable that creates the optimizer to use for training. If not specified, will use the Adagrad optimizer with a default learning rate of 0.05. activation_fn: Activation function applied to each layer. dropout: When not `None`, the probability we will drop out a given coordinate. input_layer_partitioner: Partitioner for input layer. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. config: `RunConfig` object to configure the runtime settings. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. Raises: ValueError: If features has the wrong type. """ if not isinstance(features, dict): raise ValueError( 'features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) optimizer = _get_optimizer_instance(optimizer, learning_rate=0.05) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = tf.min_max_variable_partitioner( max_partitions=num_ps_replicas) with tf.variable_scope('dnn', values=tuple(iter(features.values())), partitioner=partitioner): input_layer_partitioner = input_layer_partitioner or ( tf.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # unit is num_classes, shape(batch_size, num_classes) logits = [] for idx, m in enumerate(model_collections): logits.append( _dnn_logit_fn(features, mode, idx + 1, head.logits_dimension, m.hidden_units, m.connected_layers, feature_columns, input_layer_partitioner)) logits = tf.add_n( logits ) # add logit layer is same with concactenate the layer before logit layer def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizer.minimize( loss, global_step=tf.train.get_global_step()) return head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)