def call(self, X): X = elegy.nn.Linear(64)(X) X = jax.nn.relu(X) X = elegy.nn.Linear(64)(X) X = TransformerEncoder( encoder_layer=lambda: TransformerEncoderLayer( head_size=16, num_heads=10, output_size=64, dropout=0.0, activation=jax.nn.relu, ), num_layers=3, norm=lambda: elegy.nn.LayerNormalization(), )(X) X = X[:, 0] elegy.add_summary("get_first", X) # X = elegy.nn.Linear(96)(X) # X = jax.nn.relu(X) # X = elegy.nn.LayerNormalization()(X) # X = elegy.nn.Linear(256)(X) # X = jax.nn.relu(X) # X = elegy.nn.LayerNormalization()(X) # X = elegy.nn.Dropout(0.3)(X) X = elegy.nn.Linear(self.num_classes_y)(X) X = jax.nn.softmax(X) return X
def call(self, z: np.ndarray) -> np.ndarray: z = elegy.nn.Linear(self.hidden_size)(z) z = jax.nn.relu(z) elegy.add_summary("relu", z) logits = elegy.nn.Linear(jnp.prod(jnp.array(self.output_shape)))(z) logits = jnp.reshape(logits, (-1, *self.output_shape)) elegy.add_summary("relu", z) return logits
def call(self, x: np.ndarray) -> np.ndarray: x = elegy.nn.Flatten()(x) x = elegy.nn.Linear(self.hidden_size)(x) x = jax.nn.relu(x) elegy.add_summary("relu", x) mean = elegy.nn.Linear(self.latent_size, name="linear_mean")(x) log_stddev = elegy.nn.Linear(self.latent_size, name="linear_std")(x) stddev = jnp.exp(log_stddev) elegy.add_loss("kl_divergence", KLDivergence(weight=2e-1)(mean, stddev)) z = mean + stddev * jax.random.normal(elegy.next_rng_key(), mean.shape) return z