def RepresentationMask(mask, serializer, **unused_kwargs): """Upsamples a mask to cover the serialized representation.""" # Trax enforces the mask to be of the same size as the target. Get rid of the # extra dimensions. mask = np.amax(mask, axis=tuple(range(2, mask.ndim))) return np.broadcast_to(mask[:, :, None], mask.shape + (serializer.representation_length, ))
def _update_sketched(self, grads, weights, m, v, opt_params): """Update for higher-rank parameters.""" learning_rate = opt_params['learning_rate'] momentum = opt_params['momentum'] shape = weights.shape rank = len(shape) reshaped_accumulators = [np.reshape(v[i], self._expanded_shape(shape, i)) for i in range(rank)] current_accumulator = self._minimum(reshaped_accumulators) current_accumulator += grads * grads accumulator_inv_sqrt = np.where(current_accumulator > 0.0, 1.0 / np.sqrt(current_accumulator), np.zeros_like(current_accumulator)) preconditioned_gradient = grads * accumulator_inv_sqrt m = (1.0 - momentum) * preconditioned_gradient + momentum * m weights = weights - (learning_rate * m).astype(weights.dtype) for i in range(len(v)): axes = list(range(int(i))) + list(range(int(i) + 1, rank)) dim_accumulator = np.amax(current_accumulator, axis=axes) v[i] = dim_accumulator return weights, (m, v)
def representation_mask(mask): mask = jnp.amax(mask, axis=tuple(range(2, mask.ndim))) return jnp.broadcast_to( mask[:, :, None], mask.shape + (serializer.representation_length, ))