def __call__(self, z, train: bool = True): # Common arguments conv_kwargs = { 'kernel_size': (4, 4), 'strides': (2, 2), 'padding': 'SAME', 'use_bias': False, 'kernel_init': he_normal() } norm_kwargs = { 'use_running_average': not train, 'momentum': 0.99, 'epsilon': 0.001, 'use_scale': True, 'use_bias': True } z = np.reshape(z, (1, 1, self.zdim)) # Layer 1 z = nn.ConvTranspose(features=512, kernel_size=(4, 4), strides=(1, 1), padding='VALID', use_bias=False, kernel_init=he_normal())(z) z = nn.BatchNorm(**norm_kwargs)(z) z = nn.leaky_relu(z, 0.2) # Layer 2 z = nn.ConvTranspose(features=256, **conv_kwargs)(z) z = nn.BatchNorm(**norm_kwargs)(z) z = nn.leaky_relu(z, 0.2) # Layer 3 z = nn.ConvTranspose(features=128, **conv_kwargs)(z) z = nn.BatchNorm(**norm_kwargs)(z) z = nn.leaky_relu(z, 0.2) # Layer 4 z = nn.ConvTranspose(features=64, **conv_kwargs)(z) z = nn.BatchNorm(**norm_kwargs)(z) z = nn.leaky_relu(z, 0.2) # Layer 5 z = nn.ConvTranspose(features=1, kernel_size=(4, 4), strides=(2, 2), padding='SAME', use_bias=False, kernel_init=nn.initializers.xavier_normal())(z) # x = nn.sigmoid(z) x = nn.softplus(z) return jnp.rot90(np.squeeze(x), k=2) # Rotate to match TF output
def __call__(self, x, train: bool = True): # Common arguments kwargs = { 'kernel_size': (4, 4), 'strides': (2, 2), 'padding': 'SAME', 'use_bias': False, 'kernel_init': he_normal() } # x = np.reshape(x, (64, 64, 1)) x = x[..., None] # Layer 1 x = nn.Conv(features=64, **kwargs)(x) x = nn.leaky_relu(x, 0.2) # Layer 2 x = nn.Conv(features=128, **kwargs)(x) x = nn.BatchNorm(use_running_average=not train)(x) x = nn.leaky_relu(x, 0.2) # Layer 3 x = nn.Conv(features=256, **kwargs)(x) x = nn.BatchNorm(use_running_average=not train)(x) x = nn.leaky_relu(x, 0.2) # Layer 4 x = nn.Conv(features=512, **kwargs)(x) x = nn.BatchNorm(use_running_average=not train)(x) x = nn.leaky_relu(x, 0.2) # Layer 5 x = nn.Conv(features=4096, kernel_size=(4, 4), strides=(1, 1), padding='VALID', use_bias=False, kernel_init=he_normal())(x) x = nn.leaky_relu(x, 0.2) # Flatten x = x.flatten() # Predict latent variables z_mean = nn.Dense(features=self.zdim)(x) z_logvar = nn.Dense(features=self.zdim)(x) return z_mean, z_logvar
def __call__(self, inputs, use_running_stats=None): """Calls appropriate batch normalization and nonlinearity per spin.""" use_running_stats = nn.module.merge_param("use_running_stats", self.use_running_stats, use_running_stats) options = dict(use_running_stats=use_running_stats, momentum=self.momentum, epsilon=self.epsilon, axis_name=self.axis_name) outputs = [] for i, spin in enumerate(self.spins): inputs_spin = inputs[Ellipsis, [i], :] if spin == 0: outputs_spin = SphericalBatchNormalization( use_bias=True, centered=True, **options)(inputs_spin) outputs_spin = nn.leaky_relu(outputs_spin.real) else: outputs_spin = SphericalBatchNormalization( use_bias=False, centered=False, **options)(inputs_spin) outputs_spin = MagnitudeNonlinearity( bias_initializer=self.bias_initializer, name=f"magnitude_nonlin_{i}")(outputs_spin) outputs.append(outputs_spin) return jnp.concatenate(outputs, axis=-2)
def test_spin0_matches_relu(self): """Zero spin matches real leaky_relu, others match MagnitudeNonlinearity.""" spins = [0, -1, 2] inputs, outputs, outputs_relu = _evaluate_magnitudenonlinearity_versions( spins) self.assertAllEqual(outputs[Ellipsis, 1:, :], outputs_relu[Ellipsis, 1:, :]) self.assertAllEqual(outputs_relu[Ellipsis, 0, :], nn.leaky_relu(inputs[Ellipsis, 0, :].real))
def __call__(self, inputs): """Applies pointwise nonlinearity to 5D inputs.""" magnitude = MagnitudeNonlinearity(self.epsilon, self.bias_initializer, name="magnitude_nonlin")(inputs) # In [1], the spin0 inputs are cast to real at every layer. We choose to # merge this operation with the nonlinearity. relu = nn.leaky_relu(inputs.real) spins = jnp.expand_dims(jnp.array(self.spins), [0, 1, 2, 4]) return jnp.where(spins == 0, relu, magnitude)
def __call__(self, inputs): """Applies pointwise nonlinearity to 5D inputs.""" outputs = [] for i, spin in enumerate(self.spins): inputs_spin = inputs[Ellipsis, [i], :] if spin == 0: # In [1], the spin0 inputs are cast to real at every # layer. Here we merge this operation with the nonlinearity. outputs_spin = nn.leaky_relu(inputs_spin.real) else: outputs_spin = MagnitudeNonlinearity( self.epsilon, self.bias_initializer, name=f"magnitude_nonlin_{i}")(inputs_spin) outputs.append(outputs_spin) return jnp.concatenate(outputs, axis=-2)
def __call__(self, z): h1 = nn.leaky_relu(self.lyr1(z)) # λ = 1.0 + nn.softplus(self.lyrn(h1)) λ = 1.0 + jnp.exp(self.lyrn(h1)) return λ
def __call__(self, graph, feat, get_attention=False): r""" Description ----------- Compute graph attention network layer. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor or pair of torch.Tensor If a torch.Tensor is given, the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of torch.Tensor is given, the pair must contain two tensors of shape :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`. get_attention : bool, optional Whether to return the attention values. Default to False. Returns ------- torch.Tensor The output feature of shape :math:`(N, H, D_{out})` where :math:`H` is the number of heads, and :math:`D_{out}` is size of output feature. torch.Tensor, optional The attention values of shape :math:`(E, H, 1)`, where :math:`E` is the number of edges. This is returned only when :attr:`get_attention` is ``True``. Raises ------ DGLError If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. """ with graph.local_scope(): if not self.allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError( 'There are 0-in-degree nodes in the graph, ' 'output for those nodes will be invalid. ' 'This is harmful for some applications, ' 'causing silent performance regression. ' 'Adding self-loop on the input graph by ' 'calling `g = dgl.add_self_loop(g)` will resolve ' 'the issue. Setting ``allow_zero_in_degree`` ' 'to be `True` when constructing this module will ' 'suppress the check and let the code run.') if isinstance(feat, tuple): h_src = nn.Dropout(self.feat_drop)(feat[0]) h_dst = nn.Dropout(self.feat_drop)(feat[1]) feat_src = self.fc_src(h_src).reshape( (-1, self.num_heads, self.out_feats)) feat_dst = self.fc_dst(h_dst).reshape( (-1, self.num_heads, self.out_feats)) else: h_src = h_dst = nn.Dropout(self.feat_drop)(feat) feat_src = feat_dst = self.fc(h_src).reshape( (-1, self.num_heads, self.out_feats)) if graph.is_block: feat_dst = feat_src[:graph.number_of_dst_nodes()] # NOTE: GAT paper uses "first concatenation then linear projection" # to compute attention scores, while ours is "first projection then # addition", the two approaches are mathematically equivalent: # We decompose the weight vector a mentioned in the paper into # [a_l || a_r], then # a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j # Our implementation is much efficient because we do not need to # save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus, # addition could be optimized with DGL's built-in function u_add_v, # which further speeds up computation and saves memory footprint. el = jnp.expand_dims((feat_src * self.attn_l).sum(axis=-1), -1) er = jnp.expand_dims((feat_dst * self.attn_r).sum(axis=-1), -1) graph.srcdata.update({'ft': feat_src, 'el': el}) graph.dstdata.update({'er': er}) # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively. graph.apply_edges(fn.u_add_v('el', 'er', 'e')) e = nn.leaky_relu(graph.edata.pop('e'), self.negative_slop) # compute softmax graph.edata['a'] = nn.Dropout(self.attn_drop)(edge_softmax( graph, e)) # message passing graph.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft')) rst = graph.dstdata['ft'] # residual if self.res_fc is not None: resval = self.res_fc(h_dst).reshape( (h_dst.shape[0], -1, self._out_feats)) rst = rst + resval # activation if self.activation: rst = self.activation(rst) if get_attention: return rst, graph.edata['a'] else: return rst