def call(self, inputs): X, A = inputs N = K.shape(A)[-1] # Check if the layer is operating in mixed or batch mode mode = ops.autodetect_mode(X, A) self.reduce_loss = mode in (modes.MIXED, modes.BATCH) # Get normalized adjacency if K.is_sparse(A): I_ = tf.sparse.eye(N, dtype=A.dtype) A_ = tf.sparse.add(A, I_) else: I_ = tf.eye(N, dtype=A.dtype) A_ = A + I_ fltr = ops.normalize_A(A_) # Node embeddings Z = K.dot(X, self.kernel_emb) Z = ops.modal_dot(fltr, Z) if self.activation is not None: Z = self.activation(Z) # Compute cluster assignment matrix S = K.dot(X, self.kernel_pool) S = ops.modal_dot(fltr, S) S = activations.softmax(S, axis=-1) # softmax applied row-wise # Link prediction loss S_gram = ops.modal_dot(S, S, transpose_b=True) if mode == modes.MIXED: A = tf.sparse.to_dense(A)[None, ...] if K.is_sparse(A): LP_loss = tf.sparse.add(A, -S_gram) # A/tf.norm(A) - S_gram/tf.norm(S_gram) else: LP_loss = A - S_gram LP_loss = tf.norm(LP_loss, axis=(-1, -2)) if self.reduce_loss: LP_loss = K.mean(LP_loss) self.add_loss(LP_loss) # Entropy loss entr = tf.negative( tf.reduce_sum(tf.multiply(S, K.log(S + K.epsilon())), axis=-1) ) entr_loss = K.mean(entr, axis=-1) if self.reduce_loss: entr_loss = K.mean(entr_loss) self.add_loss(entr_loss) # Pooling X_pooled = ops.modal_dot(S, Z, transpose_a=True) A_pooled = ops.matmul_at_b_a(S, A) output = [X_pooled, A_pooled] if self.return_mask: output.append(S) return output
def call(self, inputs): X = inputs[0] # (batch_size, N, F) A = inputs[1] # (batch_size, N, N) E = inputs[2] # (n_edges, S) or (batch_size, N, N, S) mode = ops.autodetect_mode(A, X) if mode == modes.SINGLE: return self._call_single(inputs) # Parameters N = K.shape(X)[-2] F = K.int_shape(X)[-1] F_ = self.channels # Normalize adjacency matrix A = ops.normalize_A(A) # Filter network kernel_network = E for l in self.kernel_network_layers: kernel_network = l(kernel_network) # Convolution target_shape = (-1, N, N, F_, F) if mode == modes.BATCH else (N, N, F_, F) kernel = K.reshape(kernel_network, target_shape) output = kernel * A[..., None, None] output = tf.einsum('abicf,aif->abc', output, X) if self.use_bias: output = K.bias_add(output, self.bias) if self.activation is not None: output = self.activation(output) return output
def call(self, inputs): x, a = inputs mode = ops.autodetect_mode(x, a) if mode == modes.SINGLE and K.is_sparse(a): output, attn_coef = self._call_single(x, a) else: if K.is_sparse(a): a = tf.sparse.to_dense(a) output, attn_coef = self._call_dense(x, a) if self.concat_heads: shape = tf.concat( (tf.shape(output)[:-2], [self.attn_heads * self.channels]), axis=0 ) output = tf.reshape(output, shape) else: output = tf.reduce_mean(output, axis=-2) if self.use_bias: output += self.bias output = self.activation(output) if self.return_attn_coef: return output, attn_coef else: return output
def call(self, inputs): X = inputs[0] A = inputs[1] mode = ops.autodetect_mode(A, X) if mode == modes.SINGLE and K.is_sparse(A): output, attn_coef = self._call_single(X, A) else: output, attn_coef = self._call_dense(X, A) if self.concat_heads: shape = output.shape[:-2] + [self.attn_heads * self.channels] shape = [d if d is not None else -1 for d in shape] output = tf.reshape(output, shape) else: output = tf.reduce_mean(output, axis=-2) if self.use_bias: output += self.bias output = self.activation(output) if self.return_attn_coef: return output, attn_coef else: return output
def call(self, inputs): x, a, e = inputs mode = ops.autodetect_mode(a, x) if mode == modes.SINGLE: return self._call_single(inputs) # Parameters N = K.shape(x)[-2] F = K.int_shape(x)[-1] F_ = self.channels # Filter network kernel_network = e for layer in self.kernel_network_layers: kernel_network = layer(kernel_network) # Convolution target_shape = (-1, N, N, F_, F) if mode == modes.BATCH else (N, N, F_, F) kernel = K.reshape(kernel_network, target_shape) output = kernel * a[..., None, None] output = tf.einsum('abicf,aif->abc', output, x) if self.root: output += ops.dot(x, self.root_kernel) if self.use_bias: output = K.bias_add(output, self.bias) if self.activation is not None: output = self.activation(output) return output
def call(self, inputs, mask=None): x, a, e = inputs # Parameters N = tf.shape(x)[-2] F = tf.shape(x)[-1] F_ = self.channels # Filter network kernel_network = e for layer in self.kernel_network_layers: kernel_network = layer(kernel_network) # Convolution mode = ops.autodetect_mode(x, a) if mode == modes.BATCH: kernel = K.reshape(kernel_network, (-1, N, N, F_, F)) output = kernel * a[..., None, None] output = tf.einsum("abcde,ace->abd", output, x) else: # Enforce sparse representation if not K.is_sparse(a): warnings.warn("Casting dense adjacency matrix to SparseTensor." "This can be an expensive operation. ") a = tf.sparse.from_dense(a) target_shape = (-1, F, F_) if mode == modes.MIXED: target_shape = (tf.shape(x)[0], ) + target_shape kernel = tf.reshape(kernel_network, target_shape) index_i = a.indices[:, 1] index_j = a.indices[:, 0] messages = tf.gather(x, index_j, axis=-2) messages = tf.einsum("...ab,...abc->...ac", messages, kernel) output = ops.scatter_sum(messages, index_i, N) if self.root: output += K.dot(x, self.root_kernel) if self.use_bias: output = K.bias_add(output, self.bias) if mask is not None: output *= mask[0] output = self.activation(output) return output
def call(self, inputs): # Note that I is useless, because thee layer cannot be used in graph # batch mode. if len(inputs) == 3: X, A, I = inputs else: X, A = inputs I = None N = K.shape(A)[-1] # Check if the layer is operating in batch mode (X and A have rank 3) mode = ops.autodetect_mode(A, X) self.reduce_loss = mode in (ops._modes['M'], ops._modes['B']) # Get normalized adjacency if K.is_sparse(A): I_ = tf.sparse.eye(N, dtype=A.dtype) A_ = tf.sparse.add(A, I_) else: I_ = tf.eye(N, dtype=A.dtype) A_ = A + I_ fltr = ops.normalize_A(A_) # Node embeddings Z = K.dot(X, self.kernel_emb) Z = ops.filter_dot(fltr, Z) if self.activation is not None: Z = self.activation(Z) # Compute cluster assignment matrix S = K.dot(X, self.kernel_pool) S = ops.filter_dot(fltr, S) S = activations.softmax(S, axis=-1) # softmax applied row-wise # Link prediction loss S_gram = ops.matmul_A_BT(S, S) if K.is_sparse(A): LP_loss = tf.sparse.add(A, -S_gram) # A/tf.norm(A) - S_gram/tf.norm(S_gram) else: LP_loss = A - S_gram LP_loss = tf.norm(LP_loss, axis=(-1, -2)) if self.reduce_loss: LP_loss = K.mean(LP_loss) self.add_loss(LP_loss) # Entropy loss entr = tf.negative(tf.reduce_sum(tf.multiply(S, K.log(S + K.epsilon())), axis=-1)) entr_loss = K.mean(entr, axis=-1) if self.reduce_loss: entr_loss = K.mean(entr_loss) self.add_loss(entr_loss) # Pooling X_pooled = ops.matmul_AT_B(S, Z) A_pooled = ops.matmul_AT_B_A(S, A) if K.ndim(A_pooled) == 3: self.mixed_mode = True output = [X_pooled, A_pooled] if I is not None: I_mean = tf.segment_mean(I, I) I_pooled = ops.repeat(I_mean, tf.ones_like(I_mean) * self.k) output.append(I_pooled) if self.return_mask: output.append(S) return output