Ejemplo n.º 1
0
  def __call__(self,
               inputs,
               use_running_stats = None):
    """Normalizes the input using batch (optional) means and variances.

    Stats are computed over the batch and spherical dimensions: (0, 1, 2).

    Args:
      inputs: An array of dimensions (batch_size, resolution, resolution,
        n_spins_in, n_channels_in).
      use_running_stats: if true, the statistics stored in batch_stats will be
        used instead of computing the batch statistics on the input.

    Returns:
      Normalized inputs (the same shape as inputs).
    """
    use_running_stats = nn.module.merge_param(
        "use_running_stats", self.use_running_stats, use_running_stats)

    # Normalization is independent per spin per channel.
    num_spins, num_channels = inputs.shape[-2:]
    feature_shape = (1, 1, 1, num_spins, num_channels)
    reduced_feature_shape = (num_spins, num_channels)

    initializing = not self.has_variable("batch_stats", "variance")

    running_variance = self.variable("batch_stats", "variance",
                                     lambda s: jnp.ones(s, jnp.float32),
                                     reduced_feature_shape)

    if self.centered:
      running_mean = self.variable("batch_stats", "mean",
                                   lambda s: jnp.zeros(s, jnp.complex64),
                                   reduced_feature_shape)

    if use_running_stats:
      variance = running_variance.value
      if self.centered:
        mean = running_mean.value
    else:
      # Compute the spherical mean over the spherical grid dimensions, then a
      # conventional mean over the batch.
      if self.centered:
        mean = sphere_utils.spin_spherical_mean(inputs)
        mean = jnp.mean(mean, axis=0)
      # Complex variance is E[x x*] - E[x]E[x*].
      # For spin != 0, E[x] should be zero, although due to discretization this
      # is not always true. We only use E[x x*] here.
      # E[x x*]:
      mean_abs_squared = sphere_utils.spin_spherical_mean(inputs *
                                                          inputs.conj())
      mean_abs_squared = jnp.mean(mean_abs_squared, axis=0)
      # Aggregate means over devices.
      if self.axis_name is not None and not initializing:
        if self.centered:
          mean = lax.pmean(mean, axis_name=self.axis_name)
        mean_abs_squared = lax.pmean(mean_abs_squared, axis_name=self.axis_name)

      # Imaginary part is negligible.
      variance = mean_abs_squared.real

      if not initializing:
        running_variance.value = (self.momentum * running_variance.value +
                                  (1 - self.momentum) * variance)
        if self.centered:
          running_mean.value = (self.momentum * running_mean.value +
                                (1 - self.momentum) * mean)

    if self.centered:
      outputs = inputs - mean.reshape(feature_shape)
    else:
      outputs = inputs

    factor = lax.rsqrt(variance.reshape(feature_shape) + self.epsilon)
    if self.use_scale:
      scale = self.param("scale",
                         self.scale_init,
                         reduced_feature_shape).reshape(feature_shape)
      factor = factor * scale

    outputs = outputs * factor

    if self.use_bias:
      bias = self.param("bias",
                        self.bias_init,
                        reduced_feature_shape).reshape(feature_shape)
      outputs = outputs + bias

    return outputs
)

rotation_matrices = jax.vmap(jax.vmap(rotation_matrix))(
    trajectories["solution_values"])
rotation_matrices = jnp.einsum("ij,abjk", (rotation_matrix(problem.x0).T),
                               rotation_matrices)

epsilon_tensor = jnp.array([
    [[0, 0, 0], [0, 0, 1], [0, -1, 0]],
    [[0, 0, -1], [0, 0, 0], [1, 0, 0]],
    [[0, 1, 0], [-1, 0, 0], [0, 0, 0]],
])

delta_u = -0.5 * jnp.einsum("kij,abij->abk", epsilon_tensor, rotation_matrices)

cor = jnp.mean(delta_u**2, axis=0)

t_a = trajectories["time_values"][0]
t_t = jnp.arange(0.0, trajectories["time_values"][0][-1], 0.005)
plt.plot(t_a, cor[:, 0])
plt.plot(t_a, cor[:, 1])
plt.plot(t_a, cor[:, 2])

D = 1.0
plt.plot(
    t_t,
    1.0 / 6.0 - (5.0 / 12.0) * jnp.exp(-6.0 * D * t_t) +
    (1.0 / 4.0) * jnp.exp(-2.0 * D * t_t),
    label="theoretical",
)
Ejemplo n.º 3
0
def LayerNorm(x, params, epsilon=1e-6, **unused_kwargs):  # pylint: disable=invalid-name
    (scale, bias) = params
    mean = np.mean(x, axis=-1, keepdims=True)
    variance = np.mean((x - mean)**2, axis=-1, keepdims=True)
    norm_inputs = (x - mean) / np.sqrt(variance + epsilon)
    return norm_inputs * scale + bias
Ejemplo n.º 4
0
def loss(params, batch):
    inputs, targets = batch
    preds = predict(params, inputs)
    return -jnp.mean(jnp.sum(preds * targets, axis=1))
Ejemplo n.º 5
0
def mlp_loss(params, x, y):
    probs = mlp_predict(params, x)
    loss = jnp.mean(cross_entropy(probs, y))
    return loss
Ejemplo n.º 6
0
def cross_entropy_loss(logits, labels):
    return -jnp.mean(jnp.sum(onehot(labels) * logits, axis=-1))
Ejemplo n.º 7
0
 def loss_fn(params, batch):
   x, y = batch
   y_hat = model_apply(params, x)
   return jnp.mean(jnp.square(y_hat - y))
Ejemplo n.º 8
0
def _acc_fn(logits, labels):
    """
    Classification accuracy of the model.
    """
    predicted_class = jnp.argmax(logits, axis=1)
    return jnp.mean(predicted_class == labels)
Ejemplo n.º 9
0
def _reg_loss_fn(reg):
    return jnp.mean(reg)
Ejemplo n.º 10
0
 def compute_loss(params, obs, act, returns):
     logp = get_policy(params, obs).log_prob(act)
     return jnp.mean(-(logp * returns))
Ejemplo n.º 11
0
 def metrics(dist_params, priors, beta):
     kl_div = self.f.proba_dist.kl_divergence(dist_params, priors)
     return {
         'KLDivRegularizer/beta': beta,
         'KLDivRegularizer/kl_div': jnp.mean(kl_div)
     }
Ejemplo n.º 12
0
def loss(params, batch, model_predict):
    """Calculate loss."""
    inputs, targets = batch
    preds = model_predict(params, inputs)
    return -np.mean(np.sum(preds * one_hot(targets, preds.shape[-1]), axis=-1))
Ejemplo n.º 13
0
def neg_log_perplexity(batch, model_predictions):
    """Calculate negative log perplexity."""
    _, targets = batch
    hot_targets = one_hot(targets, model_predictions.shape[-1])
    return np.mean(np.sum(model_predictions * hot_targets, axis=-1))
Ejemplo n.º 14
0
def accuracy(batch, model_predictions):
    """Calculate accuracy."""
    _, targets = batch
    predicted_class = np.argmax(model_predictions, axis=-1)
    return np.mean(predicted_class == targets)
Ejemplo n.º 15
0
  def __call__(self, inputs, is_training, test_local_stats=False,
               scale=None, offset=None):
    """Connects the batch norm.

    Args:
      inputs: An array, where the data format is [..., C].
      is_training: Whether this is during training.
      test_local_stats: Whether local stats are used when is_training=False.
      scale: An array up to n-D. The shape of this tensor must be broadcastable
        to the shape of `inputs`. This is the scale applied to the normalized
        inputs. This cannot be passed in if the module was constructed with
        `create_scale=True`.
      offset: An array up to n-D. The shape of this tensor must be broadcastable
        to the shape of `inputs`. This is the offset applied to the normalized
        inputs. This cannot be passed in if the module was constructed with
        `create_offset=True`.

    Returns:
      The array, normalized across all but the last dimension.
    """
    rank = inputs.ndim
    channel_index = self._channel_index
    if self._channel_index < 0:
      channel_index += rank

    if self._axis:
      axis = self._axis
    else:
      axis = [i for i in range(rank) if i != channel_index]
    if is_training or test_local_stats:
      if self._cross_replica_axis:
        # Calculate global statistics - n is the number of replicas which could
        # differ from jax.device_count() in cases of nested pmaps.
        n = jax.lax.psum(1, self._cross_replica_axis)

        mean = jnp.mean(inputs, axis, keepdims=True)
        mean = jax.lax.psum(mean, axis_name=self._cross_replica_axis) / n
        mean_of_squares = jnp.mean(inputs**2, axis, keepdims=True)
        mean_of_squares = jax.lax.psum(
            mean_of_squares, axis_name=self._cross_replica_axis) / n
        var = mean_of_squares - mean ** 2
      else:
        mean = jnp.mean(inputs, axis, keepdims=True)
        # This uses E[(X - E[X])^2].
        # TODO(tycai): Consider the faster, but possibly less stable
        # E[X^2] - E[X]^2 method.
        var = jnp.var(inputs, axis, keepdims=True)
    else:
      mean = self._mean_ema.average
      var = self._var_ema.average

    # Update moving averages.
    if is_training:
      self._mean_ema(mean)
      self._var_ema(var)

    params_shape = tuple(
        1 if i in axis else inputs.shape[i] for i in range(rank))
    if self._create_scale:
      if scale is not None:
        raise ValueError(
            "Cannot pass `scale` at call time if `create_scale=True`.")
      scale = base.get_parameter("scale", params_shape, inputs.dtype,
                                 self._scale_init)
    elif scale is None:
      scale = 1.
    if self._create_offset:
      if offset is not None:
        raise ValueError(
            "Cannot pass `offset` at call time if `create_offset=True`.")
      offset = base.get_parameter("offset", params_shape, inputs.dtype,
                                  self._offset_init)
    elif offset is None:
      offset = 0.

    # TODO(tycai): TF found that the below comment was ~2x faster than this
    # naive implementation (w/o XLA). Benchmark & consider their implementation.
    # inv = scale * lax.rsqrt(var + self._eps)
    # return inputs * inv + (offset - (mean * inv))
    return scale * (inputs - mean) / jnp.sqrt(var + self._eps) + offset
Ejemplo n.º 16
0
def main(unused_argv):
    using_SGD = FLAGS.using_SGD
    train_size = FLAGS.train_size
    x_train, y_train, x_test, y_test = pickle.load(
        open("data_" + str(train_size) + ".p", "rb"))
    print("Got data")
    sys.stdout.flush()

    train_size = FLAGS.train_size

    # Build the network
    init_fn, apply_fn, _ = stax.serial(
        stax.Dense(2048, 1., 0.05),
        # stax.Erf(),
        stax.Relu(),
        stax.Dense(1, 1., 0.05))

    ##ONLY IMPLEMENTED MSE LOSS AND 0,1 LABELS for now

    # initialize the network first time, to compute NTK
    randnnn = numpy.random.random_integers(np.iinfo(np.int32).min,
                                           high=np.iinfo(np.int32).max,
                                           size=2)[0]
    key = random.PRNGKey(randnnn)
    _, params = init_fn(key, (-1, 784))

    # Create an MSE predictor to solve the NTK equation in function space.
    # we assume that the NTK is approximately the same for any sample of parameters (true in the limit of infinite width)
    sys.stdout.flush()
    g_dd = pickle.load(open("ntk_train_" + str(FLAGS.train_size) + ".p", "rb"))
    g_td = pickle.load(
        open("ntk_train_test_" + str(FLAGS.train_size) + ".p", "rb"))
    print("Got NTK")
    if not using_SGD:
        predictor = nt.predict.gradient_descent_mse(g_dd, y_train, g_td)

    batch_size = FLAGS.batch_size

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    print(rank)
    for i in range(FLAGS.num_samples):
        if i % (ceil(FLAGS.num_samples / 100)) == 0:
            print(i)
            sys.stdout.flush()
        #reinitialize the network
        randnnn = numpy.random.random_integers(np.iinfo(np.int32).min,
                                               high=np.iinfo(np.int32).max,
                                               size=2)[0]
        key = random.PRNGKey(randnnn)
        _, params = init_fn(key, (-1, 784))

        # Get initial values of the network in function space.
        fx_train = apply_fn(params, x_train)
        fx_test = apply_fn(params, x_test)

        if using_SGD:
            error = 1
            lr = 0.1
            lr = nt.predict.max_learning_rate(g_dd)
            print(lr)
            # lr *= 0.05
            # lr*=1
            ntk_train = g_dd.squeeze()
            ntk_train_test = g_td.squeeze()
            if batch_size == train_size:
                indices = np.array(list(range(train_size)))
            while error >= 0.5:
                if batch_size != train_size:
                    indices = numpy.random.choice(range(train_size),
                                                  size=batch_size,
                                                  replace=False)
                fx_test = fx_test - lr * np.matmul(
                    ntk_train_test[:, indices],
                    (fx_train[indices] - y_train[indices])) / (2 * batch_size)
                fx_train = fx_train - lr * np.matmul(
                    ntk_train[:, indices],
                    (fx_train[indices] - y_train[indices])) / (2 * batch_size)
                # fx_train = jax.ops.index_add(fx_train, indices, -lr*np.matmul(ntk_train[:,indices],(fx_train[indices]-y_train[indices]))/(2*batch_size))
                # print(fx_train[0:10])
                error = np.dot(
                    (fx_train - y_train).squeeze(),
                    (fx_train - y_train).squeeze()) / (2 * train_size)
                #print(error)
        else:
            # Get predictions from analytic computation.
            fx_train, fx_test = predictor(FLAGS.train_time, fx_train, fx_test)

        OUTPUT = fx_test > 0.5
        OUTPUT = OUTPUT.astype(int)
        #print(np.transpose(OUTPUT))
        fun = ''.join([str(int(i)) for i in OUTPUT])
        fun
        TRUE_OUTPUT = y_test > 0.5
        TRUE_OUTPUT = TRUE_OUTPUT.astype(int)
        #print(np.transpose(OUTPUT))
        ''.join([str(int(i)) for i in TRUE_OUTPUT])
        print("Generalization accuracy",
              np.sum(OUTPUT == TRUE_OUTPUT) / FLAGS.test_size)

        loss = lambda fx, y_hat: 0.5 * np.mean((fx - y_hat)**2)
        #util.print_summary('train', y_train, apply_fn(params, x_train), fx_train, loss)
        #util.print_summary('test', y_test, apply_fn(params, x_test), fx_test, loss)

        OUTPUT = fx_train > 0.5
        OUTPUT = OUTPUT.astype(int)
        #print(np.transpose(OUTPUT))
        ''.join([str(int(i)) for i in OUTPUT])
        TRUE_OUTPUT = y_train > 0.5
        TRUE_OUTPUT = TRUE_OUTPUT.astype(int)
        #print(np.transpose(OUTPUT))
        ''.join([str(int(i)) for i in TRUE_OUTPUT])
        print("Training accuracy",
              np.sum(OUTPUT == TRUE_OUTPUT) / FLAGS.train_size)
        assert np.all(OUTPUT == TRUE_OUTPUT)

        file = open('results/data_{}_large.txt'.format(rank), 'a')
        file.write(fun + '\n')
    file.close()
Ejemplo n.º 17
0
def test_binomial_mean(n, p):
    samples = binomial(random.PRNGKey(1), p, n, shape=(100, 100))
    expected_mean = n * p
    assert_allclose(np.mean(samples), expected_mean, rtol=0.05)
    rng, rng_predict = random.split(random.PRNGKey(0))
    samples = run_inference(model, args, rng, X, Y, D_H, sigma)
    samples_collected.append((sigma, samples))

    # predict Y_test at inputs X_test
    vmap_args = (samples, random.split(rng_predict, args["num_samples"]))
    predictions = vmap(lambda samples, rng: predict(
        model, rng, samples, X_test, D_H, sigma))(*vmap_args)
    predictions = predictions[..., 0]
    1
    train_predictions = vmap(lambda samples, rng: predict(
        model, rng, samples, X, D_H, sigma))(*vmap_args)
    train_predictions = train_predictions[..., 0]

    # compute mean prediction and 95% confidence interval around median
    mean_prediction = np.mean(predictions, axis=0)
    percentiles = onp.percentile(predictions, [2.5, 97.5], axis=0)

    # compute mean prediction and confidence interval around median
    train_mean_prediction = np.mean(train_predictions, axis=0)

    # plot training data
    ax[i].plot(X, Y, 'kx', c="red", alpha=0.3, label="Data samples")
    # plot 90% confidence level of predictions
    ax[i].fill_between(X_test[:, 0],
                       percentiles[0, :],
                       percentiles[1, :],
                       color='lightblue',
                       label="95% CI",
                       step='mid')
    # plot mean prediction
def munchausen_target_quantile_values(network, target_params, states, actions,
                                      next_states, rewards, terminals,
                                      num_tau_prime_samples,
                                      num_quantile_samples, cumulative_gamma,
                                      rng, tau, alpha, clip_value_min):
    """Build the munchausen target for return values at given quantiles."""
    rng, rng1, rng2, rng3 = jax.random.split(rng, num=4)
    target_action = network.apply(target_params,
                                  states,
                                  num_quantiles=num_quantile_samples,
                                  rng=rng1)
    curr_state_representation = target_action.representation
    curr_state_representation = jnp.squeeze(curr_state_representation)
    is_terminal_multiplier = 1. - terminals.astype(jnp.float32)
    # Incorporate terminal state to discount factor.
    gamma_with_terminal = cumulative_gamma * is_terminal_multiplier
    gamma_with_terminal = jnp.tile(gamma_with_terminal,
                                   [num_tau_prime_samples])

    replay_net_target_outputs = network.apply(
        target_params,
        next_states,
        num_quantiles=num_tau_prime_samples,
        rng=rng2)
    replay_quantile_values = replay_net_target_outputs.quantile_values

    target_next_action = network.apply(target_params,
                                       next_states,
                                       num_quantiles=num_quantile_samples,
                                       rng=rng3)
    target_next_quantile_values_action = target_next_action.quantile_values
    replay_next_target_q_values = jnp.squeeze(
        jnp.mean(target_next_quantile_values_action, axis=0))

    q_state_values = target_action.quantile_values
    replay_target_q_values = jnp.squeeze(jnp.mean(q_state_values, axis=0))

    num_actions = q_state_values.shape[-1]
    replay_action_one_hot = jax.nn.one_hot(actions, num_actions)
    replay_next_log_policy = stable_scaled_log_softmax(
        replay_next_target_q_values, tau, axis=0)
    replay_next_policy = stable_softmax(replay_next_target_q_values,
                                        tau,
                                        axis=0)
    replay_log_policy = stable_scaled_log_softmax(replay_target_q_values,
                                                  tau,
                                                  axis=0)

    tau_log_pi_a = jnp.sum(replay_log_policy * replay_action_one_hot, axis=0)
    tau_log_pi_a = jnp.clip(tau_log_pi_a, a_min=clip_value_min, a_max=1)
    munchausen_term = alpha * tau_log_pi_a
    weighted_logits = (replay_next_policy *
                       (replay_quantile_values - replay_next_log_policy))

    target_quantile_vals = jnp.sum(weighted_logits, axis=1)
    rewards += munchausen_term
    rewards = jnp.tile(rewards, [num_tau_prime_samples])
    target_quantile_vals = (rewards +
                            gamma_with_terminal * target_quantile_vals)
    next_state_representation = target_next_action.representation
    next_state_representation = jnp.squeeze(next_state_representation)

    return (rng, jax.lax.stop_gradient(target_quantile_vals[:, None]),
            jax.lax.stop_gradient(curr_state_representation),
            jax.lax.stop_gradient(next_state_representation))
Ejemplo n.º 20
0
 def loss_fn(params, batch):
     """The loss function."""
     x, y = batch
     y_hat = model_apply(params, x)
     return jnp.mean(jnp.square(y_hat - y))
Ejemplo n.º 21
0
def _update_numerical_cluster(numerical_points, assignment, k):
    return jnp.mean(jnp.where(
        assignment.reshape(-1, 1, 1) == jnp.arange(k).reshape(1, k, 1),
        numerical_points[:, jnp.newaxis, :], 0),
                    axis=0)
Ejemplo n.º 22
0
 def loss(params, R):
     return np.mean((vmap(energy_fn,
                          (None, 0))(params, R) - E_gt(R, dr0))**2)
Ejemplo n.º 23
0
def accuracy(params, batch):
    inputs, targets = batch
    target_class = jnp.argmax(targets, axis=1)
    predicted_class = jnp.argmax(predict(params, inputs), axis=1)
    return jnp.mean(predicted_class == target_class)
Ejemplo n.º 24
0
def crossentropy_loss(logpred, target):
    """Calculate crossentropy loss."""
    return -np.mean(
        np.sum(logpred * slax.one_hot(target, logpred.shape[-1]), axis=-1))
Ejemplo n.º 25
0
for ind in range(1):  #Jee.shape[0]

    # Jee Jei Jie Jii gE gI NMDAratio plocal sigR (or sigEE sigIE)
    params_init = np.array([
        Jee[ind] / psi, Jei[ind] / psi, Jie[ind] / psi, Jii[ind] / psi, 1,
        I2E[ind], 0.1, Plocal[ind], Plocal[ind], sigEE[ind], sigIE[ind]
    ])
    OLDSTYLE = False
    params_init = find_params_to_sigmoid(params_init,
                                         MULTI=True,
                                         OLDSTYLE=OLDSTYLE)

    spect, fs, f0, r_fp, CONVG = ssn_multi_probes.ssn_FP(params_init,
                                                         OLDSTYLE=OLDSTYLE)

    spect = np.real(spect) / np.mean(np.real(spect))

    if CONVG:
        #         r_targ = r_fp[trgt,rad_inds]/np.mean(r_fp[trgt, rad_inds], axis =1)

        #         softmax_r = T * logsumexp( r_targ / T )
        #         suppression_index = 1 - (r_targ[-1]/softmax_r)
        #find suppression index for both E/I cells
        r_targ = r_fp[trgt, :]
        r_targ = r_targ[:, rad_inds] / np.mean(r_targ[:, rad_inds],
                                               axis=1)[:, None]
        softmax_r = T * logsumexp(r_targ / T)
        suppression_index = 1 - (r_targ[:, -1] / softmax_r)

        if suppression_index[0] > SI_max:
            SI_max = suppression_index[0]
Ejemplo n.º 26
0
def categorical_cross_entropy_loss(logits, labels):
  onehot_labels = common_utils.onehot(labels, logits.shape[-1])
  return jnp.mean(-jnp.sum(onehot_labels * jnp.log(logits), axis=1))
Ejemplo n.º 27
0
 def apply_fun(params, inputs, **kwargs):
   del kwargs
   (a_2, b_2) = params
   mean = np.mean(inputs, axis=-1, keepdims=True)
   std = np.std(inputs, axis=-1, keepdims=True)
   return a_2 * (inputs - mean) / (std + epsilon) + b_2
Ejemplo n.º 28
0
 def _per_batch(inputs, labels):
   logits = predict(params, inputs)
   predicted_classes = top_k_classes(logits, 1)
   predicted_classes = predicted_classes.reshape((predicted_classes.shape[0],))
   return jnp.mean(predicted_classes == labels)
Ejemplo n.º 29
0
def loss(params, batch):
  inputs, targets = batch
  preds = predict(params, inputs)
  return -np.mean(preds * targets)
Ejemplo n.º 30
0
def accuracy(params, X, y):
    target_class = jnp.argmax(y, axis=1)
    predicted_class = jnp.argmax(batch_predict(params, X), axis=1)
    return jnp.mean(predicted_class == target_class)