def dual(self, var_set: ParamSet, objectives: ParamSet) -> Tensor: dual_vars, acts = self._compute_dualvars_nonconvexgrad( var_set, objectives) nb_targets = objectives[self.index].shape[0] # Compute the primals. This is not based on the activation minimizing the # lagrangian (because those are not necessarily primal feasible) primals = self._objective_fn(acts, objectives) lagrangian_terms = self.collect_lagrangian_varterms( objectives, dual_vars) # For each item in the network, we have a list of all the terms it is # involved in. Let's use this to minimize the lagrangian. opt_acts = {} for index, lag_terms in lagrangian_terms.items(): intermediate_bound = self.previous_bounds[index] broad_lb = jnp.repeat(jnp.expand_dims(intermediate_bound.lower, axis=0), nb_targets, axis=0) broad_ub = jnp.repeat(jnp.expand_dims(intermediate_bound.upper, axis=0), nb_targets, axis=0) opt_acts[index] = _optimize_lagrangian_terms( lag_terms, broad_lb, broad_ub) minimized_lagrangian = self._objective_fn(opt_acts, objectives) for index, lag_terms in lagrangian_terms.items(): for term in lag_terms: out_term = term[1](opt_acts[index]) minimized_lagrangian = minimized_lagrangian + _sum_over_acts( out_term) return primals, minimized_lagrangian
def random_rotations(e1, e2, n, rng_key): gamma1 = jnp.repeat(jnp.expand_dims(e1, 0), n, axis=0) gamma2 = jnp.repeat(jnp.expand_dims(e2, 0), n, axis=0) theta = jnp.pi * jax.random.normal(rng_key, gamma1.shape) new_gamma1 = jnp.cos(theta) * gamma1 - jnp.sin(theta) * gamma2 new_gamma2 = jnp.sin(theta) * gamma1 + jnp.cos(theta) * gamma2 return new_gamma1, new_gamma2
def res_ARAP( # image size W, H, # unknown vector fields Offsets, Angle, # input (known) vector fields UrShape, Constraints, # masking relations C_valid, Mask): Offsets_left = np.roll(Offsets, shift=-1, axis=0) Offsets_right = np.roll(Offsets, shift=1, axis=0) Offsets_up = np.roll(Offsets, shift=-1, axis=1) Offsets_down = np.roll(Offsets, shift=1, axis=1) UrShape_left = np.roll(UrShape, shift=-1, axis=0) UrShape_right = np.roll(UrShape, shift=1, axis=0) UrShape_up = np.roll(UrShape, shift=-1, axis=1) UrShape_down = np.roll(UrShape, shift=1, axis=1) Mask_left = np.roll(Mask, shift=-1, axis=0) Mask_right = np.roll(Mask, shift=1, axis=0) Mask_up = np.roll(Mask, shift=-1, axis=1) Mask_down = np.roll(Mask, shift=1, axis=1) ML = np.repeat(np.reshape(np.logical_and(Mask, Mask_left), [W, H, 1]), repeats=2, axis=2) MR = np.repeat(np.reshape(np.logical_and(Mask, Mask_right), [W, H, 1]), repeats=2, axis=2) MU = np.repeat(np.reshape(np.logical_and(Mask, Mask_up), [W, H, 1]), repeats=2, axis=2) MD = np.repeat(np.reshape(np.logical_and(Mask, Mask_down), [W, H, 1]), repeats=2, axis=2) #print(np.logical_and(Mask, Mask_left).dtype) Ereg_left = ( ML * regular(Offsets, Offsets_left, UrShape, UrShape_left, Angle)) Ereg_right = ( MR * regular(Offsets, Offsets_right, UrShape, UrShape_right, Angle)) Ereg_up = (MU * regular(Offsets, Offsets_up, UrShape, UrShape_up, Angle)) Ereg_down = ( MD * regular(Offsets, Offsets_down, UrShape, UrShape_up, Angle)) MC = np.repeat(np.reshape(np.logical_and(Mask, C_valid), [W, H, 1]), repeats=2, axis=2) Efit = (MC * 0.5 * (Offsets - Constraints)) return (Efit, Ereg_left, Ereg_right, Ereg_up, Ereg_down) # add axis?
def __call__(self, image): x = self.backbone(image) x = self.feature_conv(x) B, H, W, EMB = x.shape[0], x.shape[1], x.shape[2], x.shape[ 3] # 0 is batch, 3 is feature col_embeds = jnp.repeat(self.col_embed[:W][jnp.newaxis, :, :], H, 0) # H, W, embedding_size//2 row_embeds = jnp.repeat(self.col_embed[:H][:, jnp.newaxis, :], W, 1) # H, W, embedding_size//2 positional_embeds = jnp.concatenate([col_embeds, row_embeds], -1) # H, W, embedding_size positional_embeds_as_seq = jnp.reshape( positional_embeds, (1, H * W, EMB)) # H*W, embedding_size image_tiles_as_seq = jnp.reshape(x, (B, H * W, -1)) queries = jnp.repeat(self.query_pos[jnp.newaxis, :, :], B, 0) x = self.transformer( positional_embeds_as_seq + 0.1 * image_tiles_as_seq, queries) pred_logits = self.linear_class(x) pred_bbox = nn.sigmoid( self.linear_bbox(x)) # TODO maybe chuck an MLP on here return {'logits': pred_logits, 'pred_boxes': pred_bbox}
def test_count_permutations_layer_mask_known_perm_zeros(self): """Tests count of weight permutations in a mask with zeroed neurons.""" param_shape = self._masked_model.params['MaskedModule_0']['unmasked'][ 'kernel'].shape # Create two unique random mask rows. row_type_one = jax.random.bernoulli( self._rng, p=0.3, shape=(param_shape[0],)).astype(jnp.int32) row_type_two = jnp.zeros(shape=(param_shape[0],), dtype=jnp.int32) # Create mask by repeating the two unique rows. repeat_one = param_shape[-1] // 3 repeat_two = param_shape[-1] - repeat_one mask_layer = {'kernel': jnp.concatenate( (jnp.repeat(row_type_one[:, jnp.newaxis], repeat_one, axis=-1), jnp.repeat(row_type_two[:, jnp.newaxis], repeat_two, axis=-1)), axis=-1)} stats = symmetry.count_permutations_mask_layer(mask_layer) with self.subTest(name='count_permutations_mask_unique'): self.assertEqual(stats['unique_neurons'], 1) with self.subTest(name='count_permutations_permutations'): self.assertEqual(stats['permutations'], math.factorial(repeat_one)) with self.subTest(name='count_permutations_zeroed'): self.assertEqual(stats['zeroed_neurons'], repeat_two) with self.subTest(name='count_permutations_total'): self.assertEqual(stats['total_neurons'], param_shape[-1])
def __call__(self, timesteps: int, batch_size: int) -> jnp.ndarray: """Computes the sinusoidal position embedding. Args: timesteps: The length of the sequence. batch_size: The size of the batch. Returns: Sinusoidal position embedding. """ full_length = timesteps + self._cache_steps if self._reverse_order: positions = jnp.arange(full_length - 1, -1, -1) positions = jnp.repeat(positions[None, :], batch_size, axis=0) else: if self._cache_steps > 0: positions = (get_pos_start(timesteps, batch_size)[:, None] + jnp.arange(timesteps)[None, :]) else: positions = jnp.arange(0, full_length) positions = jnp.repeat(positions[None, :], batch_size, axis=0) if self._clamp_len is not None: positions = jnp.minimum(positions, self._clamp_len) scaled_time = positions[:, :, None] * self._inv_freq[None, None, :] return jnp.concatenate( [jnp.sin(scaled_time), jnp.cos(scaled_time)], axis=2)
def _make_gabor(params: jnp.ndarray, rf_dim: Tuple[int, int]) -> jnp.DeviceArray: σ, θ, λ, γ, φ = [ u[:, jnp.newaxis, jnp.newaxis] for u in (params[:, 0], params[:, 1], params[:, 2], params[:, 3], params[:, 4]) ] pos_x, pos_y = [ u[:, jnp.newaxis, jnp.newaxis] for u in (params[:, 5], params[:, 6]) ] n = params.shape[0] x, y = jnp.meshgrid(jnp.arange(-rf_dim[0], rf_dim[0]), jnp.arange(-rf_dim[1], rf_dim[1])) x = jnp.repeat(x[jnp.newaxis, :, :], n, axis=0) y = jnp.repeat(y[jnp.newaxis, :, :], n, axis=0) xp = (pos_x - x) * cos(θ) - (pos_y - y) * sin(θ) yp = (pos_x - x) * sin(θ) + (pos_y - y) * cos(θ) output = exp(-(xp**2 + (γ * yp)**2) / (2 * σ**2)) * exp(1j * (2 * π * xp / λ + φ)) return zscore_img(output.real)
def simulate_data(rng_key, num_categories, num_words, num_supervised_data, num_unsupervised_data): rng_key, rng_key_transition, rng_key_emission = random.split(rng_key, 3) transition_prior = np.ones(num_categories) emission_prior = np.repeat(0.1, num_words) transition_prob = dist.Dirichlet(transition_prior).sample( key=rng_key_transition, sample_shape=(num_categories, )) emission_prob = dist.Dirichlet(emission_prior).sample( key=rng_key_emission, sample_shape=(num_categories, )) start_prob = np.repeat(1. / num_categories, num_categories) categories, words = [], [] for t in range(num_supervised_data + num_unsupervised_data): rng_key, rng_key_transition, rng_key_emission = random.split( rng_key, 3) if t == 0 or t == num_supervised_data: category = dist.Categorical(start_prob).sample( key=rng_key_transition) else: category = dist.Categorical( transition_prob[category]).sample(key=rng_key_transition) word = dist.Categorical( emission_prob[category]).sample(key=rng_key_emission) categories.append(category) words.append(word) # split into supervised data and unsupervised data categories, words = np.stack(categories), np.stack(words) supervised_categories = categories[:num_supervised_data] supervised_words = words[:num_supervised_data] unsupervised_words = words[num_supervised_data:] return (transition_prior, emission_prior, transition_prob, emission_prob, supervised_categories, supervised_words, unsupervised_words)
def test_n5_d2(self): x = jnp.ones((5, 2)) npt.assert_array_equal( utils.gaussian_potential(x), jnp.repeat( -multivariate_normal.logpdf(x[0], jnp.zeros(x.shape[-1]), 1.), 5)) m = 3. npt.assert_array_equal( utils.gaussian_potential(x, m), jnp.repeat( -multivariate_normal.logpdf(x[0], m * jnp.ones(x.shape[-1]), 1.), 5)) m = jnp.ones(2) * 3. npt.assert_array_equal( utils.gaussian_potential(x, m), jnp.repeat(-multivariate_normal.logpdf(x[0], m, 1.), 5)) with warnings.catch_warnings(): warnings.simplefilter("ignore") sqrt_prec = jnp.array([[5., 0.], [2., 3.]]) npt.assert_array_equal( utils.gaussian_potential(x, sqrt_prec=sqrt_prec), jnp.repeat(0.5 * x[0].T @ sqrt_prec @ sqrt_prec.T @ x[0], 5)) npt.assert_array_equal( utils.gaussian_potential(x, m, sqrt_prec=sqrt_prec), jnp.repeat( 0.5 * (x[0] - m).T @ sqrt_prec @ sqrt_prec.T @ (x[0] - m), 5))
def normal_cdf_inv(x: np.ndarray, mu: np.ndarray, log_sigma: np.ndarray) -> np.ndarray: """Inverse CDF of a Gaussian with given mean and log standard deviation.""" num = x.shape[-1] sigma = np.repeat(np.exp(log_sigma)[:, None], num, axis=-1) mu = np.repeat(mu[:, None], num, axis=-1) xx = np.clip(2 * x - 1, -0.999999, 0.999999) return np.sqrt(2.) * sigma * sp.special.erfinv(xx) + mu
def guide(self): if self.fit_rho: rho_loc = npy.param( Sites.RHO + Sites.LOC, jnp.tile(self.rho_loc, (self.num_ltla, 1)), ) rho_scale = npy.param( Sites.RHO + Sites.SCALE, jnp.tile(self.init_scale * self.rho_scale, (self.num_ltla, 1)), constraint=dist.constraints.positive, ) npy.sample(Sites.RHO, dist.Normal(rho_loc, rho_scale)) # mean / sd for parameter s beta_loc = npy.param( Sites.BETA + Sites.LOC, jnp.tile(self.beta_loc, (self.num_ltla_lin, self.num_basis)), ) beta_scale = npy.param( Sites.BETA + Sites.SCALE, self.init_scale * self.beta_scale * jnp.stack(self.num_ltla_lin * [jnp.eye(self.num_basis)]), constraint=dist.constraints.lower_cholesky, ) npy.sample(Sites.BETA, dist.MultivariateNormal(beta_loc, scale_tril=beta_scale)) b0_loc = npy.param( Sites.BC0 + Sites.LOC, jnp.concatenate([ jnp.repeat(self.b0_loc, self.num_lin), ]), ) b0_scale = npy.param( Sites.BC0 + Sites.SCALE, jnp.diag( jnp.concatenate([ jnp.repeat( self.init_scale * self.b0_scale * self.time_scale, self.num_lin, ), ])), constraint=dist.constraints.lower_cholesky, ) npy.sample(Sites.B0, dist.MultivariateNormal(b0_loc, scale_tril=b0_scale)) c_loc = npy.param( Sites.C + Sites.LOC, jnp.tile(self.c_loc, (self.num_ltla_lin, self.num_lin))) c_scale = npy.param( Sites.C + Sites.SCALE, jnp.tile(self.init_scale * self.c_scale, (self.num_ltla_lin, self.num_lin)), ) npy.sample(Sites.C, dist.Normal(c_loc, c_scale))
def get_normal(nb_mixtures, batch_shape): """Get parameterized Normal with given batch shape.""" loc = jnp.zeros(nb_mixtures) scale = jnp.ones(nb_mixtures) for i, s in enumerate(batch_shape): loc = jnp.repeat(jnp.expand_dims(loc, i), s, axis=i) scale = jnp.repeat(jnp.expand_dims(scale, i), s, axis=i) batch_shape = (*batch_shape, nb_mixtures) normal = dist.Normal(loc=loc, scale=scale) assert normal.batch_shape == batch_shape return normal
def test_setcovs(self): self.scenario.covariances = jnp.repeat(jnp.array( [[7.]])[jnp.newaxis, :, :], 2, axis=0) npt.assert_array_equal( self.scenario.precisions, jnp.repeat(jnp.array([[1 / 7.]])[jnp.newaxis, :, :], 2, axis=0)) npt.assert_array_almost_equal( self.scenario.precision_sqrts, jnp.repeat(jnp.array([[0.37796447]])[jnp.newaxis, :, :], 2, axis=0))
def simulate_data( rng_key: np.ndarray, num_categories: int, num_words: int, num_supervised: int, num_unsupservised: int, ) -> Tuple[jnp.ndarray, jnp.ndarray, jnp.ndarray, jnp.ndarray, jnp.ndarray]: rng_key, rng_key_transition, rng_key_emission = random.split(rng_key, 3) transition_prior = jnp.ones(num_categories) emission_prior = jnp.repeat(0.1, num_words) transition_prob = dist.Dirichlet(transition_prior).sample( rng_key_transition, sample_shape=(num_categories, )) emission_prob = dist.Dirichlet(emission_prior).sample( rng_key_emission, sample_shape=(num_categories, )) start_prob = jnp.repeat(1.0 / num_categories, num_categories) category = 0 categories = [] words = [] for t in range(num_supervised + num_unsupservised): rng_key, rng_key_transition, rng_key_emission = random.split( rng_key, 3) if t == 0 or t == num_supervised: category = dist.Categorical(start_prob).sample(rng_key_transition) else: category = dist.Categorical( transition_prob[category]).sample(rng_key_transition) word = dist.Categorical( emission_prob[category]).sample(rng_key_emission) categories.append(category) words.append(word) # Split data into supervised and unsupervised categories = jnp.stack(categories) words = jnp.stack(words) supervised_categories = categories[:num_supervised] supervised_words = words[:num_supervised] unsupervised_words = words[num_supervised:] return ( transition_prob, emission_prob, supervised_categories, supervised_words, unsupervised_words, )
def _mock_outputs(online_params, target_params, key, target_name): """Returns mock network outputs.""" _, policy_params_fn = hk.transform(_hk_mock_policy_params) key_seq = hk.PRNGSequence(key) state_size = ACTION_DIM # Input state: [TIME_DIM, BATCH_DIM, DIM_STATE] s_tm1 = jax.random.normal(next(key_seq), (TIME_DIM, BATCH_DIM, state_size), jnp.float32) policy_params = policy_params_fn(online_params, None, s_tm1) target_policy_params = policy_params_fn(target_params, None, s_tm1) # Shape for actions: [NUM_SAMPLES, TIME_DIM, BATCH_DIM, ACTION_DIM] mean, stddev = target_policy_params['mean'], target_policy_params['stddev'] mean_repeated = jnp.repeat(mean.reshape((1, ) + mean.shape), NUM_SAMPLES, axis=0) stddev_repeated = jnp.repeat(stddev.reshape((1, ) + stddev.shape), NUM_SAMPLES, axis=0) target_actions = _DIAGONAL_GAUSSIAN_DIST.sample(next(key_seq), mean_repeated, stddev_repeated) # If the target is advantages then num samples is 1. if target_name == 'advantages': target_actions = target_actions[0, ...] # Shape for Q: [NUM_SAMPLES, TIME_DIM, BATCH_DIM] # Setting Q = -a_t * tf.transpose(a_t) where a_t = s_t + a. # The solution to optimizing this is basically for the policy to output # 0 actions thereby minimizing the cost. Since this is a convex # optimization problem, the algorithm should get to a good solution quickly. # First compute a_t = s_t + a with shape: [NUM_SAMPLES, TIME_DIM, BATCH_DIM, # ACTION_DIM] since action dim is the same as shape dim here and then compute # the quadratic form. a_t = target_actions + jnp.expand_dims(s_tm1, 0) sample_q_values = -jnp.sum(a_t**2, axis=-1) # Set the advantage to the same as the q value. # Shape for advantages: [TIME_DIM, BATCH_DIM] advantages = sample_q_values[0, :, :] return dict( pi_params=policy_params, target_pi_params=target_policy_params, sample_q_values=sample_q_values, advantages=advantages, target_actions=target_actions, )
def get_mvn(nb_mixtures, batch_shape): """Get parameterized MultivariateNormal with given batch shape.""" dimensions = 2 loc = jnp.zeros((nb_mixtures, dimensions)) cov_matrix = jnp.repeat( jnp.expand_dims(jnp.eye(dimensions, dimensions), 0), nb_mixtures, axis=0 ) for i, s in enumerate(batch_shape): loc = jnp.repeat(jnp.expand_dims(loc, i), s, axis=i) cov_matrix = jnp.repeat(jnp.expand_dims(cov_matrix, i), s, axis=i) batch_shape = (*batch_shape, nb_mixtures) mvn = dist.MultivariateNormal(loc=loc, covariance_matrix=cov_matrix) assert mvn.batch_shape == batch_shape return mvn
def test_batch_apply(rnn_cell): """Tests the ability to apply the RNN to a batch of inputs.""" cell, output_shape, params, inputs, state = rnn_cell batch_size = 256 # Generate batch inputs and states. batch_inputs = jnp.repeat(inputs[jnp.newaxis, :], batch_size, axis=0) batch_states = jnp.repeat(state[jnp.newaxis, :], batch_size, axis=0) # Apply the RNN. new_states = cell.batch_apply(params, batch_inputs, batch_states) # Test shape. assert new_states.shape == (batch_size, cell.num_units)
def startup(self, scenario: Scenario, n: int, initial_state: cdict, initial_extra: cdict, **kwargs) -> Tuple[cdict, cdict]: initial_state, initial_extra = super().startup(scenario, n, initial_state, initial_extra, **kwargs) if self.parameters.ensemble_batchsize is None: self.parameters.ensemble_batchsize = n initial_extra.parameters.ensemble_batchsize = n if self.parameters.ensemble_batchsize == n: self.get_batch_inds = lambda _: jnp.repeat( jnp.arange(n)[None], n, axis=0) else: self.get_batch_inds = lambda rk: random.choice( rk, n, shape=( n, self.parameters.ensemble_batchsize, )) del initial_extra.parameters.stepsize random_keys = random.split(initial_extra.random_key, n + 1) initial_extra.random_key = random_keys[-1] initial_state.potential, initial_state.grad_potential = vmap( scenario.potential_and_grad)(initial_state.value, random_keys[:n]) initial_state, initial_extra = self.adapt(initial_state, initial_extra) self.opt_init, self.opt_update, self.get_params = self.optimiser( step_size=self.parameters.stepsize, **initial_extra.parameters.optim_params) initial_extra.opt_state = self.opt_init(initial_state.value) return initial_state, initial_extra
def draw_uniform(samples, bins, desired_size): """ Draw uniform set of samples """ hist, bin_edges = np.histogram(samples, bins=bins) avg_nb = int(desired_size / float(bins)) numbers = np.repeat(avg_nb, bins) for j in range(4): numbers[hist <= numbers] = hist[hist <= numbers] nb_rest = desired_size - np.sum(numbers[hist <= numbers]) # * bins avg_nb = round(nb_rest / np.sum(hist > numbers)) numbers[hist > numbers] = avg_nb result = [] count = 0 for i in range(bin_edges.size - 1): ind = samples >= bin_edges[i] ind &= samples <= bin_edges[i + 1] if ind.sum() > 0: positions = np.where(ind)[0] nb = min([numbers[i], ind.sum()]) result.append(jax.random.choice(positions, nb, replace=False)) return np.concatenate(result)
def __call__(self, param_name, param): """Shuffles the weight matrix/mask for a given parameter, per-neuron. This is to be used with mask_map, and accepts the standard mask_map function parameters. Args: param_name: The parameter's name. param: The parameter's weight or mask matrix. Returns: A shuffled weight/mask matrix, with each neuron shuffled independently. """ del param_name # Unused. neuron_length = functools.reduce(operator.mul, param.shape[:-1]) neuron_mask = jnp.arange(neuron_length) neuron_mask = jnp.where( neuron_mask >= self._sparsity * neuron_mask.size, jnp.ones_like(neuron_mask), jnp.zeros_like(neuron_mask)) mask = jnp.repeat(neuron_mask[Ellipsis, jnp.newaxis], param.shape[-1], axis=1) self._rng, rng_input = jax.random.split(self._rng) mask = jax.random.shuffle(rng_input, mask, axis=0) return mask.reshape(param.shape)
def sample_pdf(bins, weights, num_samples, rng, det): weights = weights + 1e-5 pdf = weights / jnp.sum(weights, axis=-1, keepdims=True) cdf = jnp.cumsum(pdf, -1) cdf = jnp.concatenate((jnp.zeros_like(cdf[..., :1]), cdf), -1) if det: u = jnp.linspace(0.0, 1.0, num_samples) u = jnp.repeat(jnp.expand_dims(u, 0), cdf.shape[:-1], axis=0) else: u = jax.random.uniform(rng, list(cdf.shape[:-1]) + [num_samples]) inds = vmap(lambda cdf_i, u_i: jnp.searchsorted(cdf_i, u_i, side="right"). astype(np.int32))(cdf, u) below = jnp.maximum(0, inds - 1) above = jnp.minimum(cdf.shape[-1] - 1, inds) inds_g = jnp.stack((below, above), axis=-1) cdf_g = vmap(lambda cdf_i, inds_gi: cdf_i[inds_gi])(cdf, inds_g) bins_g = vmap(lambda bins_i, inds_gi: bins_i[inds_gi])(bins, inds_g) # don't know why we have to zero out the outliers? clean_inds = lambda arr, cutoff: jnp.where(inds_g < cutoff, arr, 0) cdf_g = clean_inds(cdf_g, cdf.shape[-1]) bins_g = clean_inds(bins_g, bins.shape[-1]) denom = cdf_g[..., 1] - cdf_g[..., 0] denom = jnp.where(denom < 1e-5, 1.0, denom) t = (u - cdf_g[..., 0]) / denom samples = bins_g[..., 0] + t * (bins_g[..., 1] - bins_g[..., 0]) return samples
def test_initialize_model_change_point(init_strategy): def model(data): alpha = 1 / jnp.mean(data) lambda1 = numpyro.sample('lambda1', dist.Exponential(alpha)) lambda2 = numpyro.sample('lambda2', dist.Exponential(alpha)) tau = numpyro.sample('tau', dist.Uniform(0, 1)) lambda12 = jnp.where(jnp.arange(len(data)) < tau * len(data), lambda1, lambda2) numpyro.sample('obs', dist.Poisson(lambda12), obs=data) count_data = jnp.array([ 13, 24, 8, 24, 7, 35, 14, 11, 15, 11, 22, 22, 11, 57, 11, 19, 29, 6, 19, 12, 22, 12, 18, 72, 32, 9, 7, 13, 19, 23, 27, 20, 6, 17, 13, 10, 14, 6, 16, 15, 7, 2, 15, 15, 19, 70, 49, 7, 53, 22, 21, 31, 19, 11, 18, 20, 12, 35, 17, 23, 17, 4, 2, 31, 30, 13, 27, 0, 39, 37, 5, 14, 13, 22, ]) rng_keys = random.split(random.PRNGKey(1), 2) init_params, _, _, _ = initialize_model(rng_keys, model, init_strategy=init_strategy, model_args=(count_data,)) if isinstance(init_strategy, partial) and init_strategy.func is init_to_value: expected = biject_to(constraints.unit_interval).inv(init_strategy.keywords.get('values')['tau']) assert_allclose(init_params[0]['tau'], jnp.repeat(expected, 2)) for i in range(2): init_params_i, _, _, _ = initialize_model(rng_keys[i], model, init_strategy=init_strategy, model_args=(count_data,)) for name, p in init_params[0].items(): # XXX: the result is equal if we disable fast-math-mode assert_allclose(p[i], init_params_i[0][name], atol=1e-6)
def update(i, state, u): w, z, r, betapow = state z = jnp.concatenate((r2c(mimo(w, u)[None, :]), z[:-1, :])) z0 = jnp.repeat(z, dims, axis=-1) z1 = jnp.tile(z, (1, dims)) rt = jax.vmap(lambda a, b: a[0] * b.conj(), in_axes=-1, out_axes=0)(z0, z1).reshape(r.shape) r = beta * r + (1 - beta) * rt # exponential moving average rhat = r / (1 - betapow) # bias correction due to small beta r_sqsum = jnp.sum(jnp.abs(rhat)**2, axis=-1) v = mimo(w, u) lcma = jnp.sum(jnp.abs(jnp.abs(v)**2 - R2)**2) lmu = 2 * (jnp.sum(r_sqsum) - jnp.sum(jnp.diag(r_sqsum))) gcma = 4 * (v * (jnp.abs(v)**2 - R2))[..., None, None] * jnp.conj(u).T[None, ...] gmu_tmp_full = (4 * rhat[..., None, None] * z.T[None, ..., None, None] * jnp.conj(u).T[None, None, None, ...] ) # shape: [dims, dims, delta, dims, T] # reduce delta axis first gmu_tmp_dr = jnp.sum(gmu_tmp_full, axis=2) # shape: [dims, dims, dims, T] # cross correlation = full correlation - self correlation gmu = jnp.sum(gmu_tmp_dr, axis=1) - gmu_tmp_dr[jnp.arange(dims), jnp.arange(dims), ...] l = lcma + lmu g = gcma + gmu out = (w, l) w = w - lr(i) * g betapow *= beta state = (w, z, r, betapow) return state, out
def sample_posterior_and_average(params, hps, key, x_txd, class_id, batch_size=None): """Get the denoised lfad inferred values by posterior sample and average. Args: params: dictionary of lfads parameters hps: dict of LFADS hyperparameters key: JAX random state x_txd: 2d np.array time by dim trial to denoise class_id: one-hot enconding of the class of this example batch_size: number of samples, if none, use hps batch size Returns: LFADS dictionary of inferred values, averaged over randomness. """ if batch_size is None: batch_size = hps['batch_size'] keys = random.split(key, batch_size) x_bxtxd = np.repeat(np.expand_dims(x_txd, axis=0), batch_size, axis=0) class_id_b = class_id * np.ones((batch_size,)).astype(np.int32) keep_rate = 1.0 use_mean = False lfads_dict = batch_forward_pass(params, hps, keys, x_bxtxd, class_id_b, keep_rate, use_mean) return utils.average_lfads_batch(lfads_dict)
def _sample_n_and_log_prob(self, key: PRNGKey, n: int) -> Tuple[Array, Array]: """See `Distribution._sample_n_and_log_prob`.""" samples = self._sample_n(key, n) log_prob = -jnp.log(self.range) log_prob = jnp.repeat(log_prob[None], n, axis=0) return samples, log_prob
def finite_horizon_lqr(amat, bmat, x_goal, u_goal, qmat, rmat, horizon): n = x_goal.shape[0] m = u_goal.shape[0] F = np.concatenate((amat, bmat), axis=1) F = np.repeat(F[None, :, :], horizon, axis=0) f = np.zeros((horizon, n)) C = np.zeros((n + m, n + m)) C = ops.index_update(C, ops.index[:n, :n], qmat) C = ops.index_update(C, ops.index[m:, m:], rmat) C = np.repeat(C[None, :, :], horizon, axis=0) c = np.zeros((horizon, n + m)) K, k = backwards_recursion(C, c, F, f) return K[0], k[0]
def conway_graph(size) -> jraph.GraphsTuple: """Returns a graph representing the game field of conway's game of life.""" # Creates nodes: each node represents a cell in the game. n_node = size**2 nodes = np.zeros((n_node, 1)) node_indices = jnp.arange(n_node) # Creates edges, senders and receivers: # the senders represent the connections to the 8 neighboring fields. n_edge = 8 * n_node edges = jnp.zeros((n_edge, 1)) senders = jnp.vstack([ node_indices - size - 1, node_indices - size, node_indices - size + 1, node_indices - 1, node_indices + 1, node_indices + size - 1, node_indices + size, node_indices + size + 1 ]) senders = senders.T.reshape(-1) senders = (senders + size**2) % size**2 receivers = jnp.repeat(node_indices, 8) # Adds a glider to the game nodes[0, 0] = 1.0 nodes[1, 0] = 1.0 nodes[2, 0] = 1.0 nodes[2 + size, 0] = 1.0 nodes[1 + 2 * size, 0] = 1.0 return jraph.GraphsTuple(n_node=jnp.array([n_node]), n_edge=jnp.array([n_edge]), nodes=jnp.asarray(nodes), edges=edges, globals=None, senders=senders, receivers=receivers)
def compute_advantage(params, critic_fn, rewards, inputs): """Compute the advantage: difference between rewards and predicted value. Args: params: parameters for the critic neural net critic_fn: function to run critic neural net rewards: rewards for the perturbed samples inputs: original samples, used as input to the Jax model Returns: advantage: [batch_size x num_mutations] """ assert inputs.ndim == 4 num_mutations, batch_size, str_length, vocab_size = inputs.shape inputs_reshaped = inputs.reshape( (num_mutations * batch_size, str_length, vocab_size)) predicted_value = critic_fn(inputs_reshaped, params, mode="train") assert predicted_value.shape == (num_mutations * batch_size, 1) predicted_value = predicted_value.reshape((num_mutations, batch_size)) assert rewards.shape == (batch_size, ) rewards = jnp.repeat(rewards[None, :], num_mutations, 0) assert rewards.shape == (num_mutations, batch_size) advantage = rewards - predicted_value advantage = jnp.transpose(advantage) assert advantage.shape == (batch_size, num_mutations) return advantage
def _get_weights(params, k, ref_row, method): ''' Reshapes the given params (weights) into the full matrix including 0 ''' if method in ['Full', None]: raw_weights = params.reshape(-1, k + 1) # weights = jax_np.zeros([k, k+1]) # weights[:-1, :] = params.reshape(-1, k + 1) elif method == 'Diag': raw_weights = jax_np.hstack( [jax_np.diag(params[:k]), params[k:].reshape(-1, 1)]) # weights[:, :-1][jax_np.diag_indices(k)] = params[:] elif method == 'FixDiag': raw_weights = jax_np.hstack( [jax_np.eye(k) * params[0], jax_np.zeros((k, 1))]) # weights[jax_np.dgag_indices(k - 1)] = params[0] # weights[jax_np.diag_indices(k)] = params[0] else: raise (ValueError("Unknown calibration method {}".format(method))) if ref_row: weights = raw_weights - jax_np.repeat( raw_weights[-1, :].reshape(1, -1), k, axis=0) else: weights = raw_weights return weights
def grad_eigh(w, v, wg, vg): """Gradient for eigenvalues and vectors of a symmetric matrix. Parameters ---------- w: eigenvalues v: eigenvectors wg: adjoint eigenvalues vg: adjoint eigenvectors """ vc = v # real N = 3 # wg, vg = g # Gradient w.r.t. eigenvalues, eigenvectors. w_repeated = np.repeat(w[..., np.newaxis], N, axis=-1) # Eigenvalue part vjp_temp = np.dot(vc * wg[..., np.newaxis, :], v.T) # Add eigenvector part only if non-zero backward signal is present. # This can avoid NaN results for degenerate cases if the function depends # on the eigenvalues only. if np.any(vg): off_diag = np.ones((N, N)) - np.eye(N) F = off_diag / (w_repeated.T - w_repeated + np.eye(N)) vjp_temp += np.dot(np.dot(vc, F * np.dot(v.T, vg)), v.T) else: assert 0 off_diag_mask = (onp.ones((3, 3)) - onp.eye(3)) / 2 return vjp_temp * np.eye( vjp_temp.shape[-1]) + (vjp_temp + vjp_temp.T) * off_diag_mask