Ejemplo n.º 1
0
    def __init__(
        self,
        in_units: int,
        hidden_units: int,
        out_units: int,
        num_hidden_layers: int = 2,
        activation: str = "lipswish",
        jacobian_method: str = "bf",
        num_power_iter: int = 1,
        coeff: float = 0.9,
        flatten: bool = False,
    ):
        super().__init__()
        self._in_units = in_units
        self._hidden_units = hidden_units
        self._out_units = out_units
        self._num_hidden_layers = num_hidden_layers
        self._activation = activation
        self._jacobian_method = jacobian_method
        self._num_power_iter = num_power_iter
        self._coeff = coeff
        self._weight_initializer = mx.init.Orthogonal(scale=self._coeff)
        self._bias_initializer = "zeros"
        self._flatten = flatten
        self._cached_inputs: List[Tensor] = []

        in_dim = self._in_units
        with self.name_scope():
            self._layers: List[mx.gluon.HybridBlock] = []
            for i in range(self._num_hidden_layers):
                lin = SNDense(
                    self._hidden_units,
                    in_units=in_dim,
                    activation=None,
                    num_power_iter=self._num_power_iter,
                    weight_initializer=self._weight_initializer,
                    bias_initializer=self._bias_initializer,
                    coeff=self._coeff,
                    flatten=self._flatten,
                )
                act = get_activation(self._activation,
                                     prefix=self._activation + str(i))
                in_dim = self._hidden_units
                self.register_child(lin)
                self.register_child(act)
                self._layers += [lin, act]

            last_lin = SNDense(
                self._out_units,
                in_units=in_dim,
                activation=None,
                num_power_iter=self._num_power_iter,
                weight_initializer=self._weight_initializer,
                bias_initializer=self._bias_initializer,
                coeff=self._coeff,
                flatten=self._flatten,
            )
            self.register_child(last_lin)
            self._layers += [last_lin]
Ejemplo n.º 2
0
def test_activation_deriv(activation, kwargs):
    def get_deriv_autograd(input, act):
        input.attach_grad()
        with autograd.record():
            output = act(input)
        return autograd.grad(output, [input], create_graph=True)[0]

    input = mx.nd.random.randn(500, 20)
    act = get_activation(activation, **kwargs)
    act.initialize()
    correct_deriv = get_deriv_autograd(input, act)
    act_deriv = get_activation_deriv(act)
    output_deriv = act_deriv(mx.ndarray, input)

    assert all([
        np.allclose(out, corr, atol=5.0e-8) for out, corr in zip(
            output_deriv.T.asnumpy(), correct_deriv.T.asnumpy())
    ])
Ejemplo n.º 3
0
    def __init__(
            self,
            units: int,
            in_units: int,
            coeff: float = 0.9,
            activation: Optional[str] = None,
            use_bias: bool = True,
            flatten: bool = True,
            weight_initializer: init.Initializer = init.Orthogonal(scale=0.9),
            bias_initializer="zeros",
            dtype="float32",
            num_power_iter: int = 1,
            ctx: Optional[mx.Context] = None,
            **kwargs):
        super().__init__(**kwargs)
        self._coeff = coeff
        self._flatten = flatten
        self._ctx = ctx if ctx is not None else get_mxnet_context()
        self._num_power_iter = num_power_iter
        with self.name_scope():
            self._units = units
            self._in_units = in_units
            self._weight = self.params.get(
                "weight",
                shape=(units, in_units),
                init=weight_initializer,
                dtype=dtype,
            )
            self._u = self.params.get("u",
                                      init=mx.init.Normal(),
                                      shape=(1, units))

            if use_bias:
                self._bias = self.params.get("bias",
                                             shape=(units, ),
                                             init=bias_initializer,
                                             dtype=dtype)
            else:
                self._bias = None

            if activation is not None:
                self._act = get_activation(activation, prefix=activation + "_")
            else:
                self._act = None