def build_prior_state(self): with tf.name_scope("prior_state"): # region Prior memory mean mean_initializer = truncated_normal(mean=0.0, stddev=1.0) self.prior_memory_mean = self.add_weight( name="prior_memory_mean", shape=[self.memory_size, self.code_size], initializer=mean_initializer, ) # endregion # region Prior memory covariance log_variance_scale = self.add_weight( name="prior_memory_log_variance_scale", shape=[], initializer=zeros) variance = log_variance_scale * tf.ones([self.memory_size ]) + backend.epsilon() self.prior_memory_covariance = tf.matrix_diag( variance, name="prior_memory_covariance") # tf.matrix_diag returns a tensor, that does not have the trainable attribute # This can break some downstream usages that inspect model parameters self.prior_memory_covariance.trainable = False # endregion self._non_trainable_weights += [ self.prior_memory_covariance, self.prior_memory_mean ]
def build(self, input_shape): # Be sure to call this somewhere! self.W = self.add_weight(shape=self.shape, initializer=truncated_normal(0, 0.1), dtype=tf.float32, name="W") super(DotProductAttentionLayer, self).build(input_shape)
def multi_head_attention(self, inputs, new_embed_size): multi_embed_size = self.num_heads * new_embed_size # B * F * (K*H) queries = tf.layers.dense(inputs=inputs, units=multi_embed_size, activation=None, kernel_initializer=truncated_normal( 0.0, 0.01), use_bias=False) keys = tf.layers.dense(inputs=inputs, units=multi_embed_size, activation=None, kernel_initializer=truncated_normal(0.0, 0.01), use_bias=False) values = tf.layers.dense(inputs=inputs, units=multi_embed_size, activation=None, kernel_initializer=truncated_normal( 0.0, 0.01), use_bias=False) if self.use_residual: residual = tf.layers.dense(inputs=inputs, units=multi_embed_size, activation=None, kernel_initializer=truncated_normal( 0.0, 0.01), use_bias=False) # H * B * F * K queries = tf.stack(tf.split(queries, self.num_heads, axis=2)) keys = tf.stack(tf.split(keys, self.num_heads, axis=2)) values = tf.stack(tf.split(values, self.num_heads, axis=2)) # H * B * F * F weights = queries @ tf.transpose(keys, [0, 1, 3, 2]) # weights = weights / np.sqrt(new_embed_size) weights = tf.nn.softmax(weights) # H * B * F * K outputs = weights @ values # 1 * B * F * (K*H) outputs = tf.concat(tf.split(outputs, self.num_heads, axis=0), axis=-1) # B * F * (K*H) outputs = tf.squeeze(outputs, axis=0) if self.use_residual: outputs += residual outputs = tf.nn.relu(outputs) return outputs
def _build_user_item(self): self.user_indices = tf.placeholder(tf.int32, shape=[None]) self.item_indices = tf.placeholder(tf.int32, shape=[None]) user_feat = tf.get_variable(name="user_feat", shape=[self.n_users, self.embed_size], initializer=truncated_normal(0.0, 0.01), regularizer=self.reg) item_feat = tf.get_variable(name="item_feat", shape=[self.n_items, self.embed_size], initializer=truncated_normal(0.0, 0.01), regularizer=self.reg) user_embed = tf.expand_dims(tf.nn.embedding_lookup( user_feat, self.user_indices), axis=1) item_embed = tf.expand_dims(tf.nn.embedding_lookup( item_feat, self.item_indices), axis=1) self.concat_embed.extend([user_embed, item_embed])
def build(self, input_shape): self.song_embedding = self.add_weight( shape=[self.target_song_size, self.target_emb_size], initializer=truncated_normal(0, 0.1), dtype=tf.float32, name="song_embedding") self.zero_bias = self.add_weight(shape=[self.target_song_size], initializer=zeros, dtype=tf.float32, trainable=False, name="bias") super(SampledSoftmaxLayer, self).build(input_shape)
def _build_sparse(self): self.sparse_indices = tf.placeholder( tf.int32, shape=[None, self.sparse_field_size]) sparse_feat = tf.get_variable( name="sparse_feat", shape=[self.sparse_feature_size, self.embed_size], initializer=truncated_normal(0.0, 0.01), regularizer=self.reg) sparse_embed = tf.nn.embedding_lookup(sparse_feat, self.sparse_indices) self.concat_embed.append(sparse_embed)
def _build_dense(self): self.dense_values = tf.placeholder(tf.float32, shape=[None, self.dense_field_size]) dense_values_reshape = tf.reshape(self.dense_values, [-1, self.dense_field_size, 1]) dense_feat = tf.get_variable( name="dense_feat", shape=[self.dense_field_size, self.embed_size], initializer=truncated_normal(0.0, 0.01), regularizer=self.reg) batch_size = tf.shape(self.dense_values)[0] # 1 * F_dense * K dense_embed = tf.expand_dims(dense_feat, axis=0) # B * F_dense * K dense_embed = tf.tile(dense_embed, [batch_size, 1, 1]) dense_embed = tf.multiply(dense_embed, dense_values_reshape) self.concat_embed.append(dense_embed)
def build_prior_state(self): with tf.name_scope("prior_state"): # region Prior memory mean mean_initializer = truncated_normal(mean=0.0, stddev=1.0) self.prior_memory_mean = self.add_weight( name="prior_memory_mean", shape=[self.memory_size, self.code_size], initializer=mean_initializer) # endregion # region Prior memory covariance log_variance_scale = self.add_weight( name="prior_memory_log_variance_scale", shape=[], initializer=zeros) variance = log_variance_scale * tf.ones([self.memory_size ]) + backend.epsilon() self.prior_memory_covariance = tf.matrix_diag( variance, name="prior_memory_covariance") # endregion self._non_trainable_weights += [ self.prior_memory_covariance, self.prior_memory_mean ]