Exemple #1
0
    def __call__(self, z, train: bool = True):
        # Common arguments
        conv_kwargs = {
            'kernel_size': (4, 4),
            'strides': (2, 2),
            'padding': 'SAME',
            'use_bias': False,
            'kernel_init': he_normal()
        }
        norm_kwargs = {
            'use_running_average': not train,
            'momentum': 0.99,
            'epsilon': 0.001,
            'use_scale': True,
            'use_bias': True
        }

        z = np.reshape(z, (1, 1, self.zdim))

        # Layer 1
        z = nn.ConvTranspose(features=512,
                             kernel_size=(4, 4),
                             strides=(1, 1),
                             padding='VALID',
                             use_bias=False,
                             kernel_init=he_normal())(z)
        z = nn.BatchNorm(**norm_kwargs)(z)
        z = nn.leaky_relu(z, 0.2)

        # Layer 2
        z = nn.ConvTranspose(features=256, **conv_kwargs)(z)
        z = nn.BatchNorm(**norm_kwargs)(z)
        z = nn.leaky_relu(z, 0.2)

        # Layer 3
        z = nn.ConvTranspose(features=128, **conv_kwargs)(z)
        z = nn.BatchNorm(**norm_kwargs)(z)
        z = nn.leaky_relu(z, 0.2)

        # Layer 4
        z = nn.ConvTranspose(features=64, **conv_kwargs)(z)
        z = nn.BatchNorm(**norm_kwargs)(z)
        z = nn.leaky_relu(z, 0.2)

        # Layer 5
        z = nn.ConvTranspose(features=1,
                             kernel_size=(4, 4),
                             strides=(2, 2),
                             padding='SAME',
                             use_bias=False,
                             kernel_init=nn.initializers.xavier_normal())(z)
        # x = nn.sigmoid(z)
        x = nn.softplus(z)

        return jnp.rot90(np.squeeze(x), k=2)  # Rotate to match TF output
Exemple #2
0
    def __call__(self, x, train: bool = True):
        # Common arguments
        kwargs = {
            'kernel_size': (4, 4),
            'strides': (2, 2),
            'padding': 'SAME',
            'use_bias': False,
            'kernel_init': he_normal()
        }

        # x = np.reshape(x, (64, 64, 1))
        x = x[..., None]

        # Layer 1
        x = nn.Conv(features=64, **kwargs)(x)
        x = nn.leaky_relu(x, 0.2)

        # Layer 2
        x = nn.Conv(features=128, **kwargs)(x)
        x = nn.BatchNorm(use_running_average=not train)(x)
        x = nn.leaky_relu(x, 0.2)

        # Layer 3
        x = nn.Conv(features=256, **kwargs)(x)
        x = nn.BatchNorm(use_running_average=not train)(x)
        x = nn.leaky_relu(x, 0.2)

        # Layer 4
        x = nn.Conv(features=512, **kwargs)(x)
        x = nn.BatchNorm(use_running_average=not train)(x)
        x = nn.leaky_relu(x, 0.2)

        # Layer 5
        x = nn.Conv(features=4096,
                    kernel_size=(4, 4),
                    strides=(1, 1),
                    padding='VALID',
                    use_bias=False,
                    kernel_init=he_normal())(x)
        x = nn.leaky_relu(x, 0.2)

        # Flatten
        x = x.flatten()

        # Predict latent variables
        z_mean = nn.Dense(features=self.zdim)(x)
        z_logvar = nn.Dense(features=self.zdim)(x)

        return z_mean, z_logvar
Exemple #3
0
    def __call__(self, inputs, use_running_stats=None):
        """Calls appropriate batch normalization and nonlinearity per spin."""
        use_running_stats = nn.module.merge_param("use_running_stats",
                                                  self.use_running_stats,
                                                  use_running_stats)

        options = dict(use_running_stats=use_running_stats,
                       momentum=self.momentum,
                       epsilon=self.epsilon,
                       axis_name=self.axis_name)
        outputs = []
        for i, spin in enumerate(self.spins):
            inputs_spin = inputs[Ellipsis, [i], :]
            if spin == 0:
                outputs_spin = SphericalBatchNormalization(
                    use_bias=True, centered=True, **options)(inputs_spin)
                outputs_spin = nn.leaky_relu(outputs_spin.real)
            else:
                outputs_spin = SphericalBatchNormalization(
                    use_bias=False, centered=False, **options)(inputs_spin)
                outputs_spin = MagnitudeNonlinearity(
                    bias_initializer=self.bias_initializer,
                    name=f"magnitude_nonlin_{i}")(outputs_spin)
            outputs.append(outputs_spin)

        return jnp.concatenate(outputs, axis=-2)
    def test_spin0_matches_relu(self):
        """Zero spin matches real leaky_relu, others match MagnitudeNonlinearity."""
        spins = [0, -1, 2]
        inputs, outputs, outputs_relu = _evaluate_magnitudenonlinearity_versions(
            spins)

        self.assertAllEqual(outputs[Ellipsis, 1:, :], outputs_relu[Ellipsis,
                                                                   1:, :])
        self.assertAllEqual(outputs_relu[Ellipsis, 0, :],
                            nn.leaky_relu(inputs[Ellipsis, 0, :].real))
Exemple #5
0
    def __call__(self, inputs):
        """Applies pointwise nonlinearity to 5D inputs."""
        magnitude = MagnitudeNonlinearity(self.epsilon,
                                          self.bias_initializer,
                                          name="magnitude_nonlin")(inputs)

        # In [1], the spin0 inputs are cast to real at every layer. We choose to
        # merge this operation with the nonlinearity.
        relu = nn.leaky_relu(inputs.real)
        spins = jnp.expand_dims(jnp.array(self.spins), [0, 1, 2, 4])
        return jnp.where(spins == 0, relu, magnitude)
    def __call__(self, inputs):
        """Applies pointwise nonlinearity to 5D inputs."""
        outputs = []
        for i, spin in enumerate(self.spins):
            inputs_spin = inputs[Ellipsis, [i], :]
            if spin == 0:
                # In [1], the spin0 inputs are cast to real at every
                # layer. Here we merge this operation with the nonlinearity.
                outputs_spin = nn.leaky_relu(inputs_spin.real)
            else:
                outputs_spin = MagnitudeNonlinearity(
                    self.epsilon,
                    self.bias_initializer,
                    name=f"magnitude_nonlin_{i}")(inputs_spin)
            outputs.append(outputs_spin)

        return jnp.concatenate(outputs, axis=-2)
Exemple #7
0
    def __call__(self, z):
        h1 = nn.leaky_relu(self.lyr1(z))
        # λ = 1.0 + nn.softplus(self.lyrn(h1))
        λ = 1.0 + jnp.exp(self.lyrn(h1))

        return λ
Exemple #8
0
    def __call__(self, graph, feat, get_attention=False):
        r"""

        Description
        -----------
        Compute graph attention network layer.

        Parameters
        ----------
        graph : DGLGraph
            The graph.
        feat : torch.Tensor or pair of torch.Tensor
            If a torch.Tensor is given, the input feature of shape :math:`(N, D_{in})` where
            :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes.
            If a pair of torch.Tensor is given, the pair must contain two tensors of shape
            :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`.
        get_attention : bool, optional
            Whether to return the attention values. Default to False.

        Returns
        -------
        torch.Tensor
            The output feature of shape :math:`(N, H, D_{out})` where :math:`H`
            is the number of heads, and :math:`D_{out}` is size of output feature.
        torch.Tensor, optional
            The attention values of shape :math:`(E, H, 1)`, where :math:`E` is the number of
            edges. This is returned only when :attr:`get_attention` is ``True``.

        Raises
        ------
        DGLError
            If there are 0-in-degree nodes in the input graph, it will raise DGLError
            since no message will be passed to those nodes. This will cause invalid output.
            The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``.
        """
        with graph.local_scope():
            if not self.allow_zero_in_degree:
                if (graph.in_degrees() == 0).any():
                    raise DGLError(
                        'There are 0-in-degree nodes in the graph, '
                        'output for those nodes will be invalid. '
                        'This is harmful for some applications, '
                        'causing silent performance regression. '
                        'Adding self-loop on the input graph by '
                        'calling `g = dgl.add_self_loop(g)` will resolve '
                        'the issue. Setting ``allow_zero_in_degree`` '
                        'to be `True` when constructing this module will '
                        'suppress the check and let the code run.')

            if isinstance(feat, tuple):
                h_src = nn.Dropout(self.feat_drop)(feat[0])
                h_dst = nn.Dropout(self.feat_drop)(feat[1])

                feat_src = self.fc_src(h_src).reshape(
                    (-1, self.num_heads, self.out_feats))
                feat_dst = self.fc_dst(h_dst).reshape(
                    (-1, self.num_heads, self.out_feats))
            else:
                h_src = h_dst = nn.Dropout(self.feat_drop)(feat)
                feat_src = feat_dst = self.fc(h_src).reshape(
                    (-1, self.num_heads, self.out_feats))
                if graph.is_block:
                    feat_dst = feat_src[:graph.number_of_dst_nodes()]
            # NOTE: GAT paper uses "first concatenation then linear projection"
            # to compute attention scores, while ours is "first projection then
            # addition", the two approaches are mathematically equivalent:
            # We decompose the weight vector a mentioned in the paper into
            # [a_l || a_r], then
            # a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j
            # Our implementation is much efficient because we do not need to
            # save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus,
            # addition could be optimized with DGL's built-in function u_add_v,
            # which further speeds up computation and saves memory footprint.
            el = jnp.expand_dims((feat_src * self.attn_l).sum(axis=-1), -1)
            er = jnp.expand_dims((feat_dst * self.attn_r).sum(axis=-1), -1)
            graph.srcdata.update({'ft': feat_src, 'el': el})
            graph.dstdata.update({'er': er})
            # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively.
            graph.apply_edges(fn.u_add_v('el', 'er', 'e'))
            e = nn.leaky_relu(graph.edata.pop('e'), self.negative_slop)
            # compute softmax
            graph.edata['a'] = nn.Dropout(self.attn_drop)(edge_softmax(
                graph, e))
            # message passing
            graph.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft'))
            rst = graph.dstdata['ft']
            # residual
            if self.res_fc is not None:
                resval = self.res_fc(h_dst).reshape(
                    (h_dst.shape[0], -1, self._out_feats))
                rst = rst + resval
            # activation
            if self.activation:
                rst = self.activation(rst)

            if get_attention:
                return rst, graph.edata['a']
            else:
                return rst