Ejemplo n.º 1
0
    def dual(self, var_set: ParamSet, objectives: ParamSet) -> Tensor:
        dual_vars, acts = self._compute_dualvars_nonconvexgrad(
            var_set, objectives)
        nb_targets = objectives[self.index].shape[0]

        # Compute the primals. This is not based on the activation minimizing the
        # lagrangian (because those are not necessarily primal feasible)
        primals = self._objective_fn(acts, objectives)

        lagrangian_terms = self.collect_lagrangian_varterms(
            objectives, dual_vars)
        # For each item in the network, we have a list of all the terms it is
        # involved in. Let's use this to minimize the lagrangian.
        opt_acts = {}
        for index, lag_terms in lagrangian_terms.items():
            intermediate_bound = self.previous_bounds[index]
            broad_lb = jnp.repeat(jnp.expand_dims(intermediate_bound.lower,
                                                  axis=0),
                                  nb_targets,
                                  axis=0)
            broad_ub = jnp.repeat(jnp.expand_dims(intermediate_bound.upper,
                                                  axis=0),
                                  nb_targets,
                                  axis=0)
            opt_acts[index] = _optimize_lagrangian_terms(
                lag_terms, broad_lb, broad_ub)

        minimized_lagrangian = self._objective_fn(opt_acts, objectives)
        for index, lag_terms in lagrangian_terms.items():
            for term in lag_terms:
                out_term = term[1](opt_acts[index])
                minimized_lagrangian = minimized_lagrangian + _sum_over_acts(
                    out_term)

        return primals, minimized_lagrangian
Ejemplo n.º 2
0
def random_rotations(e1, e2, n, rng_key):
    gamma1 = jnp.repeat(jnp.expand_dims(e1, 0), n, axis=0)
    gamma2 = jnp.repeat(jnp.expand_dims(e2, 0), n, axis=0)
    theta = jnp.pi * jax.random.normal(rng_key, gamma1.shape)
    new_gamma1 = jnp.cos(theta) * gamma1 - jnp.sin(theta) * gamma2
    new_gamma2 = jnp.sin(theta) * gamma1 + jnp.cos(theta) * gamma2
    return new_gamma1, new_gamma2
Ejemplo n.º 3
0
    def res_ARAP(
            # image size
            W,
            H,
            # unknown vector fields
            Offsets,
            Angle,
            # input (known) vector fields
            UrShape,
            Constraints,
            # masking relations
            C_valid,
            Mask):
        Offsets_left = np.roll(Offsets, shift=-1, axis=0)
        Offsets_right = np.roll(Offsets, shift=1, axis=0)
        Offsets_up = np.roll(Offsets, shift=-1, axis=1)
        Offsets_down = np.roll(Offsets, shift=1, axis=1)

        UrShape_left = np.roll(UrShape, shift=-1, axis=0)
        UrShape_right = np.roll(UrShape, shift=1, axis=0)
        UrShape_up = np.roll(UrShape, shift=-1, axis=1)
        UrShape_down = np.roll(UrShape, shift=1, axis=1)

        Mask_left = np.roll(Mask, shift=-1, axis=0)
        Mask_right = np.roll(Mask, shift=1, axis=0)
        Mask_up = np.roll(Mask, shift=-1, axis=1)
        Mask_down = np.roll(Mask, shift=1, axis=1)

        ML = np.repeat(np.reshape(np.logical_and(Mask, Mask_left), [W, H, 1]),
                       repeats=2,
                       axis=2)
        MR = np.repeat(np.reshape(np.logical_and(Mask, Mask_right), [W, H, 1]),
                       repeats=2,
                       axis=2)
        MU = np.repeat(np.reshape(np.logical_and(Mask, Mask_up), [W, H, 1]),
                       repeats=2,
                       axis=2)
        MD = np.repeat(np.reshape(np.logical_and(Mask, Mask_down), [W, H, 1]),
                       repeats=2,
                       axis=2)

        #print(np.logical_and(Mask, Mask_left).dtype)

        Ereg_left = (
            ML * regular(Offsets, Offsets_left, UrShape, UrShape_left, Angle))
        Ereg_right = (
            MR *
            regular(Offsets, Offsets_right, UrShape, UrShape_right, Angle))
        Ereg_up = (MU *
                   regular(Offsets, Offsets_up, UrShape, UrShape_up, Angle))
        Ereg_down = (
            MD * regular(Offsets, Offsets_down, UrShape, UrShape_up, Angle))

        MC = np.repeat(np.reshape(np.logical_and(Mask, C_valid), [W, H, 1]),
                       repeats=2,
                       axis=2)

        Efit = (MC * 0.5 * (Offsets - Constraints))

        return (Efit, Ereg_left, Ereg_right, Ereg_up, Ereg_down)  # add axis?
Ejemplo n.º 4
0
    def __call__(self, image):
        x = self.backbone(image)

        x = self.feature_conv(x)
        B, H, W, EMB = x.shape[0], x.shape[1], x.shape[2], x.shape[
            3]  # 0 is batch, 3 is feature

        col_embeds = jnp.repeat(self.col_embed[:W][jnp.newaxis, :, :], H,
                                0)  #  H, W, embedding_size//2
        row_embeds = jnp.repeat(self.col_embed[:H][:, jnp.newaxis, :], W,
                                1)  # H, W, embedding_size//2

        positional_embeds = jnp.concatenate([col_embeds, row_embeds],
                                            -1)  # H, W, embedding_size

        positional_embeds_as_seq = jnp.reshape(
            positional_embeds, (1, H * W, EMB))  # H*W, embedding_size

        image_tiles_as_seq = jnp.reshape(x, (B, H * W, -1))

        queries = jnp.repeat(self.query_pos[jnp.newaxis, :, :], B, 0)

        x = self.transformer(
            positional_embeds_as_seq + 0.1 * image_tiles_as_seq, queries)

        pred_logits = self.linear_class(x)
        pred_bbox = nn.sigmoid(
            self.linear_bbox(x))  # TODO maybe chuck an MLP on here

        return {'logits': pred_logits, 'pred_boxes': pred_bbox}
Ejemplo n.º 5
0
  def test_count_permutations_layer_mask_known_perm_zeros(self):
    """Tests count of weight permutations in a mask with zeroed neurons."""
    param_shape = self._masked_model.params['MaskedModule_0']['unmasked'][
        'kernel'].shape

    # Create two unique random mask rows.
    row_type_one = jax.random.bernoulli(
        self._rng, p=0.3, shape=(param_shape[0],)).astype(jnp.int32)
    row_type_two = jnp.zeros(shape=(param_shape[0],), dtype=jnp.int32)

    # Create mask by repeating the two unique rows.
    repeat_one = param_shape[-1] // 3
    repeat_two = param_shape[-1] - repeat_one
    mask_layer = {'kernel': jnp.concatenate(
        (jnp.repeat(row_type_one[:, jnp.newaxis], repeat_one, axis=-1),
         jnp.repeat(row_type_two[:, jnp.newaxis], repeat_two, axis=-1)),
        axis=-1)}

    stats = symmetry.count_permutations_mask_layer(mask_layer)

    with self.subTest(name='count_permutations_mask_unique'):
      self.assertEqual(stats['unique_neurons'], 1)

    with self.subTest(name='count_permutations_permutations'):
      self.assertEqual(stats['permutations'], math.factorial(repeat_one))

    with self.subTest(name='count_permutations_zeroed'):
      self.assertEqual(stats['zeroed_neurons'], repeat_two)

    with self.subTest(name='count_permutations_total'):
      self.assertEqual(stats['total_neurons'], param_shape[-1])
Ejemplo n.º 6
0
    def __call__(self, timesteps: int, batch_size: int) -> jnp.ndarray:
        """Computes the sinusoidal position embedding.

    Args:
      timesteps: The length of the sequence.
      batch_size: The size of the batch.

    Returns:
      Sinusoidal position embedding.
    """
        full_length = timesteps + self._cache_steps

        if self._reverse_order:
            positions = jnp.arange(full_length - 1, -1, -1)
            positions = jnp.repeat(positions[None, :], batch_size, axis=0)
        else:
            if self._cache_steps > 0:
                positions = (get_pos_start(timesteps, batch_size)[:, None] +
                             jnp.arange(timesteps)[None, :])
            else:
                positions = jnp.arange(0, full_length)
                positions = jnp.repeat(positions[None, :], batch_size, axis=0)

        if self._clamp_len is not None:
            positions = jnp.minimum(positions, self._clamp_len)

        scaled_time = positions[:, :, None] * self._inv_freq[None, None, :]
        return jnp.concatenate(
            [jnp.sin(scaled_time), jnp.cos(scaled_time)], axis=2)
Ejemplo n.º 7
0
    def _make_gabor(params: jnp.ndarray,
                    rf_dim: Tuple[int, int]) -> jnp.DeviceArray:
        σ, θ, λ, γ, φ = [
            u[:, jnp.newaxis, jnp.newaxis]
            for u in (params[:, 0], params[:, 1], params[:, 2], params[:, 3],
                      params[:, 4])
        ]
        pos_x, pos_y = [
            u[:, jnp.newaxis, jnp.newaxis]
            for u in (params[:, 5], params[:, 6])
        ]

        n = params.shape[0]

        x, y = jnp.meshgrid(jnp.arange(-rf_dim[0], rf_dim[0]),
                            jnp.arange(-rf_dim[1], rf_dim[1]))
        x = jnp.repeat(x[jnp.newaxis, :, :], n, axis=0)
        y = jnp.repeat(y[jnp.newaxis, :, :], n, axis=0)

        xp = (pos_x - x) * cos(θ) - (pos_y - y) * sin(θ)
        yp = (pos_x - x) * sin(θ) + (pos_y - y) * cos(θ)

        output = exp(-(xp**2 + (γ * yp)**2) /
                     (2 * σ**2)) * exp(1j * (2 * π * xp / λ + φ))

        return zscore_img(output.real)
Ejemplo n.º 8
0
def simulate_data(rng_key, num_categories, num_words, num_supervised_data,
                  num_unsupervised_data):
    rng_key, rng_key_transition, rng_key_emission = random.split(rng_key, 3)

    transition_prior = np.ones(num_categories)
    emission_prior = np.repeat(0.1, num_words)

    transition_prob = dist.Dirichlet(transition_prior).sample(
        key=rng_key_transition, sample_shape=(num_categories, ))
    emission_prob = dist.Dirichlet(emission_prior).sample(
        key=rng_key_emission, sample_shape=(num_categories, ))

    start_prob = np.repeat(1. / num_categories, num_categories)
    categories, words = [], []
    for t in range(num_supervised_data + num_unsupervised_data):
        rng_key, rng_key_transition, rng_key_emission = random.split(
            rng_key, 3)
        if t == 0 or t == num_supervised_data:
            category = dist.Categorical(start_prob).sample(
                key=rng_key_transition)
        else:
            category = dist.Categorical(
                transition_prob[category]).sample(key=rng_key_transition)
        word = dist.Categorical(
            emission_prob[category]).sample(key=rng_key_emission)
        categories.append(category)
        words.append(word)

    # split into supervised data and unsupervised data
    categories, words = np.stack(categories), np.stack(words)
    supervised_categories = categories[:num_supervised_data]
    supervised_words = words[:num_supervised_data]
    unsupervised_words = words[num_supervised_data:]
    return (transition_prior, emission_prior, transition_prob, emission_prob,
            supervised_categories, supervised_words, unsupervised_words)
Ejemplo n.º 9
0
    def test_n5_d2(self):
        x = jnp.ones((5, 2))
        npt.assert_array_equal(
            utils.gaussian_potential(x),
            jnp.repeat(
                -multivariate_normal.logpdf(x[0], jnp.zeros(x.shape[-1]), 1.),
                5))

        m = 3.
        npt.assert_array_equal(
            utils.gaussian_potential(x, m),
            jnp.repeat(
                -multivariate_normal.logpdf(x[0], m * jnp.ones(x.shape[-1]),
                                            1.), 5))

        m = jnp.ones(2) * 3.
        npt.assert_array_equal(
            utils.gaussian_potential(x, m),
            jnp.repeat(-multivariate_normal.logpdf(x[0], m, 1.), 5))

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            sqrt_prec = jnp.array([[5., 0.], [2., 3.]])
            npt.assert_array_equal(
                utils.gaussian_potential(x, sqrt_prec=sqrt_prec),
                jnp.repeat(0.5 * x[0].T @ sqrt_prec @ sqrt_prec.T @ x[0], 5))
            npt.assert_array_equal(
                utils.gaussian_potential(x, m, sqrt_prec=sqrt_prec),
                jnp.repeat(
                    0.5 * (x[0] - m).T @ sqrt_prec @ sqrt_prec.T @ (x[0] - m),
                    5))
Ejemplo n.º 10
0
def normal_cdf_inv(x: np.ndarray, mu: np.ndarray,
                   log_sigma: np.ndarray) -> np.ndarray:
    """Inverse CDF of a Gaussian with given mean and log standard deviation."""
    num = x.shape[-1]
    sigma = np.repeat(np.exp(log_sigma)[:, None], num, axis=-1)
    mu = np.repeat(mu[:, None], num, axis=-1)
    xx = np.clip(2 * x - 1, -0.999999, 0.999999)
    return np.sqrt(2.) * sigma * sp.special.erfinv(xx) + mu
Ejemplo n.º 11
0
    def guide(self):
        if self.fit_rho:
            rho_loc = npy.param(
                Sites.RHO + Sites.LOC,
                jnp.tile(self.rho_loc, (self.num_ltla, 1)),
            )
            rho_scale = npy.param(
                Sites.RHO + Sites.SCALE,
                jnp.tile(self.init_scale * self.rho_scale, (self.num_ltla, 1)),
                constraint=dist.constraints.positive,
            )
            npy.sample(Sites.RHO, dist.Normal(rho_loc, rho_scale))

        # mean / sd for parameter s
        beta_loc = npy.param(
            Sites.BETA + Sites.LOC,
            jnp.tile(self.beta_loc, (self.num_ltla_lin, self.num_basis)),
        )
        beta_scale = npy.param(
            Sites.BETA + Sites.SCALE,
            self.init_scale * self.beta_scale *
            jnp.stack(self.num_ltla_lin * [jnp.eye(self.num_basis)]),
            constraint=dist.constraints.lower_cholesky,
        )

        npy.sample(Sites.BETA,
                   dist.MultivariateNormal(beta_loc, scale_tril=beta_scale))

        b0_loc = npy.param(
            Sites.BC0 + Sites.LOC,
            jnp.concatenate([
                jnp.repeat(self.b0_loc, self.num_lin),
            ]),
        )
        b0_scale = npy.param(
            Sites.BC0 + Sites.SCALE,
            jnp.diag(
                jnp.concatenate([
                    jnp.repeat(
                        self.init_scale * self.b0_scale * self.time_scale,
                        self.num_lin,
                    ),
                ])),
            constraint=dist.constraints.lower_cholesky,
        )
        npy.sample(Sites.B0,
                   dist.MultivariateNormal(b0_loc, scale_tril=b0_scale))

        c_loc = npy.param(
            Sites.C + Sites.LOC,
            jnp.tile(self.c_loc, (self.num_ltla_lin, self.num_lin)))

        c_scale = npy.param(
            Sites.C + Sites.SCALE,
            jnp.tile(self.init_scale * self.c_scale,
                     (self.num_ltla_lin, self.num_lin)),
        )
        npy.sample(Sites.C, dist.Normal(c_loc, c_scale))
Ejemplo n.º 12
0
def get_normal(nb_mixtures, batch_shape):
    """Get parameterized Normal with given batch shape."""
    loc = jnp.zeros(nb_mixtures)
    scale = jnp.ones(nb_mixtures)
    for i, s in enumerate(batch_shape):
        loc = jnp.repeat(jnp.expand_dims(loc, i), s, axis=i)
        scale = jnp.repeat(jnp.expand_dims(scale, i), s, axis=i)
    batch_shape = (*batch_shape, nb_mixtures)
    normal = dist.Normal(loc=loc, scale=scale)
    assert normal.batch_shape == batch_shape
    return normal
Ejemplo n.º 13
0
 def test_setcovs(self):
     self.scenario.covariances = jnp.repeat(jnp.array(
         [[7.]])[jnp.newaxis, :, :],
                                            2,
                                            axis=0)
     npt.assert_array_equal(
         self.scenario.precisions,
         jnp.repeat(jnp.array([[1 / 7.]])[jnp.newaxis, :, :], 2, axis=0))
     npt.assert_array_almost_equal(
         self.scenario.precision_sqrts,
         jnp.repeat(jnp.array([[0.37796447]])[jnp.newaxis, :, :], 2,
                    axis=0))
Ejemplo n.º 14
0
def simulate_data(
    rng_key: np.ndarray,
    num_categories: int,
    num_words: int,
    num_supervised: int,
    num_unsupservised: int,
) -> Tuple[jnp.ndarray, jnp.ndarray, jnp.ndarray, jnp.ndarray, jnp.ndarray]:

    rng_key, rng_key_transition, rng_key_emission = random.split(rng_key, 3)

    transition_prior = jnp.ones(num_categories)
    emission_prior = jnp.repeat(0.1, num_words)

    transition_prob = dist.Dirichlet(transition_prior).sample(
        rng_key_transition, sample_shape=(num_categories, ))
    emission_prob = dist.Dirichlet(emission_prior).sample(
        rng_key_emission, sample_shape=(num_categories, ))

    start_prob = jnp.repeat(1.0 / num_categories, num_categories)
    category = 0
    categories = []
    words = []

    for t in range(num_supervised + num_unsupservised):
        rng_key, rng_key_transition, rng_key_emission = random.split(
            rng_key, 3)
        if t == 0 or t == num_supervised:
            category = dist.Categorical(start_prob).sample(rng_key_transition)
        else:
            category = dist.Categorical(
                transition_prob[category]).sample(rng_key_transition)

        word = dist.Categorical(
            emission_prob[category]).sample(rng_key_emission)
        categories.append(category)
        words.append(word)

    # Split data into supervised and unsupervised
    categories = jnp.stack(categories)
    words = jnp.stack(words)

    supervised_categories = categories[:num_supervised]
    supervised_words = words[:num_supervised]
    unsupervised_words = words[num_supervised:]

    return (
        transition_prob,
        emission_prob,
        supervised_categories,
        supervised_words,
        unsupervised_words,
    )
Ejemplo n.º 15
0
def _mock_outputs(online_params, target_params, key, target_name):
    """Returns mock network outputs."""
    _, policy_params_fn = hk.transform(_hk_mock_policy_params)
    key_seq = hk.PRNGSequence(key)

    state_size = ACTION_DIM

    # Input state: [TIME_DIM, BATCH_DIM, DIM_STATE]
    s_tm1 = jax.random.normal(next(key_seq), (TIME_DIM, BATCH_DIM, state_size),
                              jnp.float32)
    policy_params = policy_params_fn(online_params, None, s_tm1)
    target_policy_params = policy_params_fn(target_params, None, s_tm1)

    # Shape for actions: [NUM_SAMPLES, TIME_DIM, BATCH_DIM, ACTION_DIM]
    mean, stddev = target_policy_params['mean'], target_policy_params['stddev']
    mean_repeated = jnp.repeat(mean.reshape((1, ) + mean.shape),
                               NUM_SAMPLES,
                               axis=0)
    stddev_repeated = jnp.repeat(stddev.reshape((1, ) + stddev.shape),
                                 NUM_SAMPLES,
                                 axis=0)
    target_actions = _DIAGONAL_GAUSSIAN_DIST.sample(next(key_seq),
                                                    mean_repeated,
                                                    stddev_repeated)
    # If the target is advantages then num samples is 1.
    if target_name == 'advantages':
        target_actions = target_actions[0, ...]

    # Shape for Q: [NUM_SAMPLES, TIME_DIM, BATCH_DIM]
    # Setting Q = -a_t * tf.transpose(a_t) where a_t = s_t + a.
    # The solution to optimizing this is basically for the policy to output
    # 0 actions thereby minimizing the cost. Since this is a convex
    # optimization problem, the algorithm should get to a good solution quickly.

    # First compute a_t = s_t + a with shape: [NUM_SAMPLES, TIME_DIM, BATCH_DIM,
    # ACTION_DIM] since action dim is the same as shape dim here and then compute
    # the quadratic form.
    a_t = target_actions + jnp.expand_dims(s_tm1, 0)
    sample_q_values = -jnp.sum(a_t**2, axis=-1)
    # Set the advantage to the same as the q value.
    # Shape for advantages: [TIME_DIM, BATCH_DIM]
    advantages = sample_q_values[0, :, :]

    return dict(
        pi_params=policy_params,
        target_pi_params=target_policy_params,
        sample_q_values=sample_q_values,
        advantages=advantages,
        target_actions=target_actions,
    )
Ejemplo n.º 16
0
def get_mvn(nb_mixtures, batch_shape):
    """Get parameterized MultivariateNormal with given batch shape."""
    dimensions = 2
    loc = jnp.zeros((nb_mixtures, dimensions))
    cov_matrix = jnp.repeat(
        jnp.expand_dims(jnp.eye(dimensions, dimensions), 0), nb_mixtures, axis=0
    )
    for i, s in enumerate(batch_shape):
        loc = jnp.repeat(jnp.expand_dims(loc, i), s, axis=i)
        cov_matrix = jnp.repeat(jnp.expand_dims(cov_matrix, i), s, axis=i)
    batch_shape = (*batch_shape, nb_mixtures)
    mvn = dist.MultivariateNormal(loc=loc, covariance_matrix=cov_matrix)
    assert mvn.batch_shape == batch_shape
    return mvn
Ejemplo n.º 17
0
def test_batch_apply(rnn_cell):
    """Tests the ability to apply the RNN to a batch of inputs."""
    cell, output_shape, params, inputs, state = rnn_cell
    batch_size = 256

    # Generate batch inputs and states.
    batch_inputs = jnp.repeat(inputs[jnp.newaxis, :], batch_size, axis=0)
    batch_states = jnp.repeat(state[jnp.newaxis, :], batch_size, axis=0)

    # Apply the RNN.
    new_states = cell.batch_apply(params, batch_inputs, batch_states)

    # Test shape.
    assert new_states.shape == (batch_size, cell.num_units)
Ejemplo n.º 18
0
    def startup(self, scenario: Scenario, n: int, initial_state: cdict,
                initial_extra: cdict, **kwargs) -> Tuple[cdict, cdict]:
        initial_state, initial_extra = super().startup(scenario, n,
                                                       initial_state,
                                                       initial_extra, **kwargs)

        if self.parameters.ensemble_batchsize is None:
            self.parameters.ensemble_batchsize = n
            initial_extra.parameters.ensemble_batchsize = n

        if self.parameters.ensemble_batchsize == n:
            self.get_batch_inds = lambda _: jnp.repeat(
                jnp.arange(n)[None], n, axis=0)
        else:
            self.get_batch_inds = lambda rk: random.choice(
                rk, n, shape=(
                    n,
                    self.parameters.ensemble_batchsize,
                ))

        del initial_extra.parameters.stepsize

        random_keys = random.split(initial_extra.random_key, n + 1)
        initial_extra.random_key = random_keys[-1]

        initial_state.potential, initial_state.grad_potential = vmap(
            scenario.potential_and_grad)(initial_state.value, random_keys[:n])

        initial_state, initial_extra = self.adapt(initial_state, initial_extra)

        self.opt_init, self.opt_update, self.get_params = self.optimiser(
            step_size=self.parameters.stepsize,
            **initial_extra.parameters.optim_params)
        initial_extra.opt_state = self.opt_init(initial_state.value)
        return initial_state, initial_extra
Ejemplo n.º 19
0
def draw_uniform(samples, bins, desired_size):
    """
    Draw uniform set of samples


    """
    hist, bin_edges = np.histogram(samples, bins=bins)
    avg_nb = int(desired_size / float(bins))
    numbers = np.repeat(avg_nb, bins)
    for j in range(4):
        numbers[hist <= numbers] = hist[hist <= numbers]
        nb_rest = desired_size - np.sum(numbers[hist <= numbers])  # * bins
        avg_nb = round(nb_rest / np.sum(hist > numbers))
        numbers[hist > numbers] = avg_nb

    result = []
    count = 0
    for i in range(bin_edges.size - 1):
        ind = samples >= bin_edges[i]
        ind &= samples <= bin_edges[i + 1]
        if ind.sum() > 0:
            positions = np.where(ind)[0]
            nb = min([numbers[i], ind.sum()])
            result.append(jax.random.choice(positions, nb, replace=False))

    return np.concatenate(result)
Ejemplo n.º 20
0
    def __call__(self, param_name, param):
        """Shuffles the weight matrix/mask for a given parameter, per-neuron.

    This is to be used with mask_map, and accepts the standard mask_map
    function parameters.

    Args:
      param_name: The parameter's name.
      param: The parameter's weight or mask matrix.

    Returns:
      A shuffled weight/mask matrix, with each neuron shuffled independently.
    """
        del param_name  # Unused.
        neuron_length = functools.reduce(operator.mul, param.shape[:-1])
        neuron_mask = jnp.arange(neuron_length)
        neuron_mask = jnp.where(
            neuron_mask >= self._sparsity * neuron_mask.size,
            jnp.ones_like(neuron_mask), jnp.zeros_like(neuron_mask))
        mask = jnp.repeat(neuron_mask[Ellipsis, jnp.newaxis],
                          param.shape[-1],
                          axis=1)
        self._rng, rng_input = jax.random.split(self._rng)
        mask = jax.random.shuffle(rng_input, mask, axis=0)
        return mask.reshape(param.shape)
Ejemplo n.º 21
0
def sample_pdf(bins, weights, num_samples, rng, det):
    weights = weights + 1e-5
    pdf = weights / jnp.sum(weights, axis=-1, keepdims=True)
    cdf = jnp.cumsum(pdf, -1)
    cdf = jnp.concatenate((jnp.zeros_like(cdf[..., :1]), cdf), -1)

    if det:
        u = jnp.linspace(0.0, 1.0, num_samples)
        u = jnp.repeat(jnp.expand_dims(u, 0), cdf.shape[:-1], axis=0)
    else:
        u = jax.random.uniform(rng, list(cdf.shape[:-1]) + [num_samples])

    inds = vmap(lambda cdf_i, u_i: jnp.searchsorted(cdf_i, u_i, side="right").
                astype(np.int32))(cdf, u)

    below = jnp.maximum(0, inds - 1)
    above = jnp.minimum(cdf.shape[-1] - 1, inds)
    inds_g = jnp.stack((below, above), axis=-1)

    cdf_g = vmap(lambda cdf_i, inds_gi: cdf_i[inds_gi])(cdf, inds_g)
    bins_g = vmap(lambda bins_i, inds_gi: bins_i[inds_gi])(bins, inds_g)

    # don't know why we have to zero out the outliers?
    clean_inds = lambda arr, cutoff: jnp.where(inds_g < cutoff, arr, 0)
    cdf_g = clean_inds(cdf_g, cdf.shape[-1])
    bins_g = clean_inds(bins_g, bins.shape[-1])

    denom = cdf_g[..., 1] - cdf_g[..., 0]
    denom = jnp.where(denom < 1e-5, 1.0, denom)

    t = (u - cdf_g[..., 0]) / denom
    samples = bins_g[..., 0] + t * (bins_g[..., 1] - bins_g[..., 0])

    return samples
Ejemplo n.º 22
0
def test_initialize_model_change_point(init_strategy):
    def model(data):
        alpha = 1 / jnp.mean(data)
        lambda1 = numpyro.sample('lambda1', dist.Exponential(alpha))
        lambda2 = numpyro.sample('lambda2', dist.Exponential(alpha))
        tau = numpyro.sample('tau', dist.Uniform(0, 1))
        lambda12 = jnp.where(jnp.arange(len(data)) < tau * len(data), lambda1, lambda2)
        numpyro.sample('obs', dist.Poisson(lambda12), obs=data)

    count_data = jnp.array([
        13,  24,   8,  24,   7,  35,  14,  11,  15,  11,  22,  22,  11,  57,
        11,  19,  29,   6,  19,  12,  22,  12,  18,  72,  32,   9,   7,  13,
        19,  23,  27,  20,   6,  17,  13,  10,  14,   6,  16,  15,   7,   2,
        15,  15,  19,  70,  49,   7,  53,  22,  21,  31,  19,  11,  18,  20,
        12,  35,  17,  23,  17,   4,   2,  31,  30,  13,  27,   0,  39,  37,
        5,  14,  13,  22,
    ])

    rng_keys = random.split(random.PRNGKey(1), 2)
    init_params, _, _, _ = initialize_model(rng_keys, model,
                                            init_strategy=init_strategy,
                                            model_args=(count_data,))
    if isinstance(init_strategy, partial) and init_strategy.func is init_to_value:
        expected = biject_to(constraints.unit_interval).inv(init_strategy.keywords.get('values')['tau'])
        assert_allclose(init_params[0]['tau'], jnp.repeat(expected, 2))
    for i in range(2):
        init_params_i, _, _, _ = initialize_model(rng_keys[i], model,
                                                  init_strategy=init_strategy,
                                                  model_args=(count_data,))
        for name, p in init_params[0].items():
            # XXX: the result is equal if we disable fast-math-mode
            assert_allclose(p[i], init_params_i[0][name], atol=1e-6)
Ejemplo n.º 23
0
    def update(i, state, u):
        w, z, r, betapow = state
        z = jnp.concatenate((r2c(mimo(w, u)[None, :]), z[:-1, :]))
        z0 = jnp.repeat(z, dims, axis=-1)
        z1 = jnp.tile(z, (1, dims))
        rt = jax.vmap(lambda a, b: a[0] * b.conj(), in_axes=-1,
                      out_axes=0)(z0, z1).reshape(r.shape)
        r = beta * r + (1 - beta) * rt  # exponential moving average
        rhat = r / (1 - betapow)  # bias correction due to small beta
        r_sqsum = jnp.sum(jnp.abs(rhat)**2, axis=-1)

        v = mimo(w, u)
        lcma = jnp.sum(jnp.abs(jnp.abs(v)**2 - R2)**2)
        lmu = 2 * (jnp.sum(r_sqsum) - jnp.sum(jnp.diag(r_sqsum)))
        gcma = 4 * (v * (jnp.abs(v)**2 - R2))[..., None,
                                              None] * jnp.conj(u).T[None, ...]
        gmu_tmp_full = (4 * rhat[..., None, None] *
                        z.T[None, ..., None, None] *
                        jnp.conj(u).T[None, None, None, ...]
                        )  # shape: [dims, dims, delta, dims, T]
        # reduce delta axis first
        gmu_tmp_dr = jnp.sum(gmu_tmp_full,
                             axis=2)  # shape: [dims, dims, dims, T]
        # cross correlation = full correlation - self correlation
        gmu = jnp.sum(gmu_tmp_dr, axis=1) - gmu_tmp_dr[jnp.arange(dims),
                                                       jnp.arange(dims), ...]
        l = lcma + lmu
        g = gcma + gmu

        out = (w, l)
        w = w - lr(i) * g
        betapow *= beta
        state = (w, z, r, betapow)
        return state, out
def sample_posterior_and_average(params, hps, key, x_txd, class_id,
                                 batch_size=None):
  """Get the denoised lfad inferred values by posterior sample and average.

  Args:
    params: dictionary of lfads parameters
    hps: dict of LFADS hyperparameters
    key: JAX random state
    x_txd: 2d np.array time by dim trial to denoise
    class_id: one-hot enconding of the class of this example
    batch_size: number of samples, if none, use hps batch size

  Returns:
    LFADS dictionary of inferred values, averaged over randomness.
  """
  if batch_size is None:
    batch_size = hps['batch_size']
  keys = random.split(key, batch_size)
  x_bxtxd = np.repeat(np.expand_dims(x_txd, axis=0), batch_size, axis=0)
  class_id_b = class_id * np.ones((batch_size,)).astype(np.int32)
  keep_rate = 1.0
  use_mean = False
  lfads_dict = batch_forward_pass(params, hps, keys, x_bxtxd, class_id_b,
                                  keep_rate, use_mean)
  return utils.average_lfads_batch(lfads_dict)
Ejemplo n.º 25
0
 def _sample_n_and_log_prob(self, key: PRNGKey,
                            n: int) -> Tuple[Array, Array]:
     """See `Distribution._sample_n_and_log_prob`."""
     samples = self._sample_n(key, n)
     log_prob = -jnp.log(self.range)
     log_prob = jnp.repeat(log_prob[None], n, axis=0)
     return samples, log_prob
Ejemplo n.º 26
0
def finite_horizon_lqr(amat, bmat, x_goal, u_goal, qmat, rmat, horizon):
    n = x_goal.shape[0]
    m = u_goal.shape[0]

    F = np.concatenate((amat, bmat), axis=1)
    F = np.repeat(F[None, :, :], horizon, axis=0)
    f = np.zeros((horizon, n))

    C = np.zeros((n + m, n + m))
    C = ops.index_update(C, ops.index[:n, :n], qmat)
    C = ops.index_update(C, ops.index[m:, m:], rmat)
    C = np.repeat(C[None, :, :], horizon, axis=0)
    c = np.zeros((horizon, n + m))

    K, k = backwards_recursion(C, c, F, f)
    return K[0], k[0]
Ejemplo n.º 27
0
def conway_graph(size) -> jraph.GraphsTuple:
    """Returns a graph representing the game field of conway's game of life."""
    # Creates nodes: each node represents a cell in the game.
    n_node = size**2
    nodes = np.zeros((n_node, 1))
    node_indices = jnp.arange(n_node)
    # Creates edges, senders and receivers:
    # the senders represent the connections to the 8 neighboring fields.
    n_edge = 8 * n_node
    edges = jnp.zeros((n_edge, 1))
    senders = jnp.vstack([
        node_indices - size - 1, node_indices - size, node_indices - size + 1,
        node_indices - 1, node_indices + 1, node_indices + size - 1,
        node_indices + size, node_indices + size + 1
    ])
    senders = senders.T.reshape(-1)
    senders = (senders + size**2) % size**2
    receivers = jnp.repeat(node_indices, 8)
    # Adds a glider to the game
    nodes[0, 0] = 1.0
    nodes[1, 0] = 1.0
    nodes[2, 0] = 1.0
    nodes[2 + size, 0] = 1.0
    nodes[1 + 2 * size, 0] = 1.0
    return jraph.GraphsTuple(n_node=jnp.array([n_node]),
                             n_edge=jnp.array([n_edge]),
                             nodes=jnp.asarray(nodes),
                             edges=edges,
                             globals=None,
                             senders=senders,
                             receivers=receivers)
Ejemplo n.º 28
0
def compute_advantage(params, critic_fn, rewards, inputs):
    """Compute the advantage: difference between rewards and predicted value.

  Args:
    params: parameters for the critic neural net
    critic_fn: function to run critic neural net
    rewards: rewards for the perturbed samples
    inputs: original samples, used as input to the Jax model

  Returns:
    advantage: [batch_size x num_mutations]
  """
    assert inputs.ndim == 4

    num_mutations, batch_size, str_length, vocab_size = inputs.shape

    inputs_reshaped = inputs.reshape(
        (num_mutations * batch_size, str_length, vocab_size))

    predicted_value = critic_fn(inputs_reshaped, params, mode="train")
    assert predicted_value.shape == (num_mutations * batch_size, 1)
    predicted_value = predicted_value.reshape((num_mutations, batch_size))

    assert rewards.shape == (batch_size, )
    rewards = jnp.repeat(rewards[None, :], num_mutations, 0)
    assert rewards.shape == (num_mutations, batch_size)

    advantage = rewards - predicted_value
    advantage = jnp.transpose(advantage)
    assert advantage.shape == (batch_size, num_mutations)
    return advantage
Ejemplo n.º 29
0
def _get_weights(params, k, ref_row, method):
    ''' Reshapes the given params (weights) into the full matrix including 0
        '''

    if method in ['Full', None]:
        raw_weights = params.reshape(-1, k + 1)
        # weights = jax_np.zeros([k, k+1])
        # weights[:-1, :] = params.reshape(-1, k + 1)

    elif method == 'Diag':
        raw_weights = jax_np.hstack(
            [jax_np.diag(params[:k]), params[k:].reshape(-1, 1)])
        # weights[:, :-1][jax_np.diag_indices(k)] = params[:]

    elif method == 'FixDiag':
        raw_weights = jax_np.hstack(
            [jax_np.eye(k) * params[0],
             jax_np.zeros((k, 1))])
        # weights[jax_np.dgag_indices(k - 1)] = params[0]
        # weights[jax_np.diag_indices(k)] = params[0]
    else:
        raise (ValueError("Unknown calibration method {}".format(method)))

    if ref_row:
        weights = raw_weights - jax_np.repeat(
            raw_weights[-1, :].reshape(1, -1), k, axis=0)
    else:
        weights = raw_weights

    return weights
Ejemplo n.º 30
0
def grad_eigh(w, v, wg, vg):
    """Gradient for eigenvalues and vectors of a symmetric matrix.

    Parameters
    ----------
    w: eigenvalues

    v: eigenvectors

    wg: adjoint eigenvalues

    vg: adjoint eigenvectors
    """
    vc = v  # real
    N = 3
    # wg, vg = g          # Gradient w.r.t. eigenvalues, eigenvectors.
    w_repeated = np.repeat(w[..., np.newaxis], N, axis=-1)
    # Eigenvalue part
    vjp_temp = np.dot(vc * wg[..., np.newaxis, :], v.T)

    # Add eigenvector part only if non-zero backward signal is present.
    # This can avoid NaN results for degenerate cases if the function depends
    # on the eigenvalues only.
    if np.any(vg):
        off_diag = np.ones((N, N)) - np.eye(N)
        F = off_diag / (w_repeated.T - w_repeated + np.eye(N))
        vjp_temp += np.dot(np.dot(vc, F * np.dot(v.T, vg)), v.T)
    else:
        assert 0

    off_diag_mask = (onp.ones((3, 3)) - onp.eye(3)) / 2

    return vjp_temp * np.eye(
        vjp_temp.shape[-1]) + (vjp_temp + vjp_temp.T) * off_diag_mask