def evaluate_net(*states): activations = T.fvectors(len(weights)) idx = 0 for neurons, activator, isInput, isOutput, weightFrame in weights: sumParts = [] for i, info in enumerate(weightFrame): srcIdx, w = info sumParts.append(T.dot(states[srcIdx], w.transpose())) if len(sumParts): sumParts = T.stack(*sumParts) activity = T.sum(sumParts, axis=0) if activator == TIDENTITY: activation = activity elif activator == TLOGISTIC: activation = 1. / (1. + T.exp(-activity)) elif activator == THYPERBOLIC: activation = T.tanh(activity) elif activator == TTHRESHOLD: activation = T.sgn(activity) elif activator == TBIAS: activation = T.ones_like(activity, dtype='float32') elif activator == TRADIAL: activation = T.exp(-activity*activity/2.0) else: raise Exception("Unknown activation function for layer {0}" + layer.id) else: activation = T.zeros_like(states[idx])#states[idx] activations[idx] = activation idx += 1 checklist = [T.all(T.eq(a,s)) for a,s in zip(activations, states)] condition = T.all(T.as_tensor_variable(checklist)) return activations, {}, theano.scan_module.until(condition )
def logp(self, x): n = self.n p = self.p # only defined for sum(p) == 1 return bound( factln(n) + tt.sum(x * tt.log(p) - factln(x)), tt.all(x >= 0), tt.all(x <= n), tt.eq(tt.sum(x), n), n >= 0)
def logp(self, x): n = self.n p = self.p s = self.s if s > 1: X = self._x_creation(x) result = self._normalizing_constant(n, p, s) + self._results_inner(n,x) return pm.dist_math.bound(result, tt.all(X <= 1), tt.all(X >= -1), self._check_pos_def(x), n > 0) else: X = x[self.tri_index] X = tt.fill_diagonal(X, 1) result = self._normalizing_constant(n, p, s) result += (n - 1.) * tt.log(tt.nlinalg.det(X)) # n-1 probably needs to become structure[0]-1 # I don't really know the likehood structure honestly return pm.dist_math.bound(result, tt.all(X <= 1), tt.all(X >= -1), matrix_pos_def(X), n > 0)
def logp(self, value): p_ = self.p k = self.k # Clip values before using them for indexing value_clip = tt.clip(value, 0, k - 1) # We must only check that the values sum to 1 if p comes from a # tensor variable, i.e. when p is a step_method proposal. In the other # cases we normalize ourselves if not isinstance(p_, (numbers.Number, np.ndarray, tt.TensorConstant, tt.sharedvar.SharedVariable)): sumto1 = theano.gradient.zero_grad( tt.le(abs(tt.sum(p_, axis=-1) - 1), 1e-5)) p = p_ else: p = p_ / tt.sum(p_, axis=-1, keepdims=True) sumto1 = True if p.ndim > 1: a = tt.log(np.moveaxis(p, -1, 0)[value_clip]) else: a = tt.log(p[value_clip]) return bound(a, value >= 0, value <= (k - 1), sumto1, tt.all(p_ > 0, axis=-1), tt.all(p <= 1, axis=-1))
def logp(self, x): n = self.n p = self.p # only defined for sum(p) == 1 return bound( factln(n) + T.sum(x * T.log(p) - factln(x)), T.all(x >= 0), T.all(x <= n), T.eq(T.sum(x), n), n >= 0 )
def logp(self, weights): """ Calculate log-probability of the given set of weights. Parameters ---------- value : 1-D array, having numeric values Set of weights for which log-probability is calculated. Returns ------- TensorVariable """ k = self.shape a = self.a wts = tt.as_tensor_variable(weights) wt = wts[:-1] wt_sum = tt.extra_ops.cumsum(wt) denom = 1 - wt_sum denom_shift = tt.concatenate([[1.], denom]) betas = wts/denom_shift Beta_ = pm.Beta.dist(1, a) logp = Beta_.logp(betas).sum() return bound(Beta_.logp(betas).sum(), tt.all(betas > 0), tt.all(weights) > 0, wt_sum < 1, wt_sum > 0, tt.all(denom) > 0)
def logp(self, x): n = self.n p = self.p # only defined for sum(p) == 1 return bound( factln(n) + sum(x * log(p) - factln(x)), n > 0, eq(sum(x), n), all(0 <= x), all(x <= n))
def logp(self, value): k = self.k a = self.a # only defined for sum(value) == 1 return bound( sum(logpow(value, a - 1) - gammaln(a), axis=0) + gammaln(sum(a)), k > 1, all(a > 0), all(value >= 0), all(value <= 1))
def logp(self, value): k = self.k a = self.a # only defined for sum(value) == 1 return bound(tt.sum(logpow(value, a - 1) - gammaln(a), axis=-1) + gammaln(tt.sum(a, axis=-1)), tt.all(value >= 0), tt.all(value <= 1), k > 1, tt.all(a > 0))
def logp(self, x): n = self.n p = self.p return bound( tt.sum(factln(n)) - tt.sum(factln(x)) + tt.sum(x * tt.log(p)), tt.all(x >= 0), tt.all(tt.eq(tt.sum(x, axis=-1, keepdims=True), n)), tt.all(p <= 1), tt.all(tt.eq(tt.sum(p, axis=-1), 1)), tt.all(tt.ge(n, 0)))
def logp(self, x): n = self.n p = self.p # only defined for sum(p) == 1 return bound( factln(n) + sum(x * log(p) - factln(x)), n >= 0, eq(sum(x), n), all(0 <= x), all(x <= n))
def logp(self, x): n = self.n p = self.p X = x[self.tri_index] X = T.fill_diagonal(X, 1) result = self._normalizing_constant(n, p) result += (n - 1.) * T.log(det(X)) return bound(result, T.all(X <= 1), T.all(X >= -1), n > 0)
def logp(self, value): n = self.n p = self.p return bound(factln(n) - factln(value).sum() + (value * tt.log(p)).sum(), tt.all(value >= 0), tt.all(0 <= p), tt.all(p <= 1), tt.isclose(p.sum(), 1), broadcast_conditions=False )
def logp(self, x): n = self.n p = self.p X = x[self.tri_index] X = t.fill_diagonal(X, 1) result = self._normalizing_constant(n, p) result += (n - 1.0) * log(det(X)) return bound(result, n > 0, all(le(X, 1)), all(ge(X, -1)))
def logp(self, x): n = self.n p = self.p X = x[self.tri_index] X = T.fill_diagonal(X, 1) result = self._normalizing_constant(n, p) result += (n - 1.0) * T.log(det(X)) return bound(result, T.all(X <= 1), T.all(X >= -1), n > 0)
def logp(self, x): n = self.n p = self.p X = x[self.tri_index] X = t.fill_diagonal(X, 1) result = self._normalizing_constant(n, p) result += (n - 1.) * log(det(X)) return bound(result, n > 0, all(le(X, 1)), all(ge(X, -1)))
def logp(self, value): k = self.k a = self.a # only defined for sum(value) == 1 return bound(tt.sum(logpow(value, a - 1) - gammaln(a), axis=-1) + gammaln(tt.sum(a, axis=-1)), tt.all(value >= 0), tt.all(value <= 1), k > 1, tt.all(a > 0), broadcast_conditions=False)
def logp(self, x): n = self.n p = self.p X = x[self.tri_index] X = tt.fill_diagonal(X, 1) result = self._normalizing_constant(n, p) result += (n - 1.) * tt.log(det(X)) return bound(result, tt.all(X <= 1), tt.all(X >= -1), matrix_pos_def(X), n > 0)
def in_transit(self, t, r=0.0, texp=None): """Get a list of timestamps that are in transit Args: t (vector): A vector of timestamps to be evaluated. r (Optional): The radii of the planets. texp (Optional[float]): The exposure time. Returns: The indices of the timestamps that are in transit. """ z = tt.zeros_like(self.a) r = tt.as_tensor_variable(r) + z R = self.r_star + z # Wrap the times into time since transit hp = 0.5 * self.period dt = tt.mod(self._warp_times(t) - self.t0 + hp, self.period) - hp if self.ecc is None: # Equation 14 from Winn (2010) k = r / self.r_star arg = tt.square(1 + k) - tt.square(self.b) hdur = hp * tt.arcsin(self.r_star / self.a * tt.sqrt(arg) / self.sin_incl) / np.pi t_start = -hdur t_end = hdur flag = z else: M_contact = self.contact_points_op( self.a, self.ecc, self.cos_omega, self.sin_omega, self.cos_incl + z, self.sin_incl + z, R + r) flag = M_contact[2] t_start = (M_contact[0] - self.M0) / self.n t_start = tt.mod(t_start + hp, self.period) - hp t_end = (M_contact[1] - self.M0) / self.n t_end = tt.mod(t_end + hp, self.period) - hp if texp is not None: t_start -= 0.5*texp t_end += 0.5*texp mask = tt.any(tt.and_(dt >= t_start, dt <= t_end), axis=-1) result = ifelse(tt.and_(tt.all(tt.eq(flag, 0)), tt.all(tt.gt(t_end, t_start))), tt.arange(t.size)[mask], tt.arange(t.size)) return result
def logp(self, x): n = self.n p = self.p if x.ndim == 2: x_sum = x.sum(axis=0) n_sum = n * x.shape[0] else: x_sum = x n_sum = n return bound( factln(n_sum) + tt.sum(x_sum * tt.log(p) - factln(x_sum)), tt.all(x >= 0), tt.all(x <= n), tt.eq(tt.sum(x_sum), n_sum), tt.all(p <= 1), tt.eq(p.sum(), 1), n >= 0)
def logp(self, x): n = self.n p = self.p s = self.s #X = x[self.tri_index] # need to correct #X = tt.fill_diagonal(X, 1) # need to correct X = self._x_creation(x) result = self._normalizing_constant(n, p, s) + self._results_inner(n,x) #result += (n - 1.) * T.log(det(X)) # n-1 probably needs to become structure[0]-1 return pm.dist_math.bound(result, tt.all(X <= 1), tt.all(X >= -1), n > 0)
def logp(self, x): n = self.n eta = self.eta X = x[self.tri_index] X = tt.fill_diagonal(X, 1) result = _lkj_normalizing_constant(eta, n) result += (eta - 1.) * tt.log(det(X)) return bound(result, tt.all(X <= 1), tt.all(X >= -1), matrix_pos_def(X), eta > 0, broadcast_conditions=False )
def logp(self, x): n = self.n eta = self.eta X = x[self.tri_index] X = tt.fill_diagonal(X, 1) result = _lkj_normalizing_constant(eta, n) result += (eta - 1.) * tt.log(det(X)) return bound(result, tt.all(X <= 1), tt.all(X >= -1), matrix_pos_def(X), eta > 0, broadcast_conditions=False)
def compute_weights(self, energies, attended_mask): """Overrides ``SequenceContentAttention.compute_weights()``. Instead of a normal softmax, it sets most of the energies to zero, resulting in sharp attention. If ``self.nbest`` equals 1, the thresholded attention always sets its full attention to one single source annotation. Args: energies (Variable): Energies computed by the energy_computer attended_mask (Variable): Source sentence mask Returns: Variable. Thresholded alignment weights """ # Stabilize energies first and then exponentiate energies = energies - energies.max(axis=0) unnormalized_weights = tensor.exp(energies) if attended_mask: unnormalized_weights *= attended_mask # Set everything to zero except the ``nbest`` best entries best_energies = unnormalized_weights.sort(axis=0)[-self.nbest:] min_energy = best_energies[0] thresholded_weights = tensor.switch(unnormalized_weights >= min_energy, unnormalized_weights, 0.0) # If mask consists of all zeros use 1 as the normalization coefficient normalization = (thresholded_weights.sum(axis=0) + tensor.all(1 - attended_mask, axis=0)) return thresholded_weights / normalization
def compile(self, input_placeholder, label_placeholder, loss, optimizer): x = input_placeholder for k in range(self.num_layers): x = self.layer_list[k].forward(x) self.loss = loss.forward(x, label_placeholder) self.updates = optimizer.get_updates(self.loss, self.params) self.accuracy = T.mean( T.eq(T.argmax(x, axis=-1), T.argmax(label_placeholder, axis=-1))) self.equation_accuracy = T.all( T.eq(T.argmax(x, axis=-1), T.argmax(label_placeholder, axis=-1))) LOG_INFO('start compiling model...') self.train = theano.function( inputs=[input_placeholder, label_placeholder], outputs=[self.loss, self.accuracy], updates=self.updates, allow_input_downcast=True) self.test = theano.function( inputs=[input_placeholder, label_placeholder], outputs=[self.accuracy, self.equation_accuracy, self.loss], allow_input_downcast=True) self.predict = theano.function(inputs=[input_placeholder], outputs=[x], allow_input_downcast=True) LOG_INFO('model compilation done!')
def __init__(self, a, transform=transforms.stick_breaking, *args, **kwargs): super(Dirichlet, self).__init__(transform=transform, *args, **kwargs) self.a = a self.k = a.shape[0] self.mean = a / sum(a) self.mode = switch(all(a > 1), (a - 1) / sum(a - 1), nan)
def compute_weights(self, energies, attended_mask): """Compute weights from energies in softmax-like fashion. .. todo :: Use :class:`~blocks.bricks.Softmax`. Parameters ---------- energies : :class:`~theano.Variable` The energies. Must be of the same shape as the mask. attended_mask : :class:`~theano.Variable` The mask for the attended. The index in the sequence must be the first dimension. Returns ------- weights : :class:`~theano.Variable` Summing to 1 non-negative weights of the same shape as `energies`. """ # Stabilize energies first and then exponentiate energies = energies - energies.max(axis=0) unnormalized_weights = tensor.exp(energies) if attended_mask: unnormalized_weights *= attended_mask # If mask consists of all zeros use 1 as the normalization coefficient normalization = (unnormalized_weights.sum(axis=0) + tensor.all(1 - attended_mask, axis=0)) return unnormalized_weights / normalization
def dlogp(inputs, gradients): g_logp, = gradients cov, delta = inputs g_logp.tag.test_value = floatX(1.) n, k = delta.shape chol_cov = cholesky(cov) diag = tt.nlinalg.diag(chol_cov) ok = tt.all(diag > 0) chol_cov = tt.switch(ok, chol_cov, tt.fill(chol_cov, 1)) delta_trans = solve_lower(chol_cov, delta.T).T inner = n * tt.eye(k) - tt.dot(delta_trans.T, delta_trans) g_cov = solve_upper(chol_cov.T, inner) g_cov = solve_upper(chol_cov.T, g_cov.T) tau_delta = solve_upper(chol_cov.T, delta_trans.T) g_delta = tau_delta.T g_cov = tt.switch(ok, g_cov, -np.nan) g_delta = tt.switch(ok, g_delta, -np.nan) return [-0.5 * g_cov * g_logp, -g_delta * g_logp]
def theano(self, x, mu, V, ndim, ncomp): cholesky = Cholesky(nofail=True, lower=True) solve_lower = tt.slinalg.Solve(A_structure="lower_triangular") if x.ndim == 1: onedim = True x = x[None, :] else: onedim = False delta = x[:, None, :] - mu[None, ...] logps = [] for i in range(ncomp): _chol_cov = cholesky(V[i]) k = floatX(ndim) diag = tt.nlinalg.diag(_chol_cov) # Check if the covariance matrix is positive definite. ok = tt.all(diag > 0) # If not, replace the diagonal. We return -inf later, but # need to prevent solve_lower from throwing an exception. chol_cov = tt.switch(ok, _chol_cov, 1) delta_trans = solve_lower(chol_cov, delta[:, i].T).T _quaddist = (delta_trans**2).sum(axis=-1) logdet = tt.sum(tt.log(diag)) if onedim: quaddist = _quaddist[0] else: quaddist = _quaddist norm = -0.5 * k * floatX(np.log(2 * np.pi)) logp = norm - 0.5 * quaddist - logdet safe_logp = tt.switch(alltrue_elemwise([ok]), logp, -np.inf) # safe logp (-inf for invalid) logps.append(safe_logp) return tt.stacklists(logps).T
def __init__(self, a, *args, **kwargs): super(Dirichlet, self).__init__(*args, **kwargs) self.a = a self.k = a.shape[0] self.mean = a / sum(a) self.mode = switch(all(a > 1), (a - 1) / sum(a - 1), nan)
def logp(self, value): p_ = self.p k = self.k # Clip values before using them for indexing value_clip = tt.clip(value, 0, k - 1) p = p_ / tt.sum(p_, axis=-1, keepdims=True) if p.ndim > 1: pattern = (p.ndim - 1,) + tuple(range(p.ndim - 1)) a = tt.log(p.dimshuffle(pattern)[value_clip]) else: a = tt.log(p[value_clip]) return bound(a, value >= 0, value <= (k - 1), tt.all(p_ >= 0, axis=-1), tt.all(p <= 1, axis=-1))
def adapt_step(dt, accept_prob, pos, mom, energy, energy_grad, k_energy): dt = tt.switch(tt.gt(accept_prob**sign, 2.**(-sign)), (2.**sign) * dt, dt) accept_prob = leapfrog_accept_prob(dt, pos, mom, energy, energy_grad, k_energy) return (dt, accept_prob), th.scan_module.until( tt.all(tt.le(accept_prob**sign, 2.**(-sign))))
def logp(self, value): p_ = self.p k = self.k # Clip values before using them for indexing value_clip = tt.clip(value, 0, k - 1) p = p_ / tt.sum(p_, axis=-1, keepdims=True) if p.ndim > 1: pattern = (p.ndim - 1, ) + tuple(range(p.ndim - 1)) a = tt.log(p.dimshuffle(pattern)[value_clip]) else: a = tt.log(p[value_clip]) return bound(a, value >= 0, value <= (k - 1), tt.all(p_ >= 0, axis=-1), tt.all(p <= 1, axis=-1))
def negative_log_likelihood(actual, target): """ :param actual: An (n_samples, n_labels) tensor where rows are normalized and actual[i,j] indicates the belief that on sample[i] the correct target is j. :param target: An (n_samples, ) tensor indicating the target label for each sample :return: The average (over samples) of the negative log-likelihood. """ actual = tt.opt.assert_(actual, tt.all(abs(actual.sum(axis=1)-1) < 1e-7)) # Data must be normalized along axis 1. return negative_log_likelihood_dangerous(actual, target)
def in_transit(self, t, r=0.0, texp=None): """Get a list of timestamps that are in transit Args: t (vector): A vector of timestamps to be evaluated. r (Optional): The radii of the planets. texp (Optional[float]): The exposure time. Returns: The indices of the timestamps that are in transit. """ z = tt.zeros_like(self.a) r = tt.as_tensor_variable(r) + z R = self.r_star + z # Wrap the times into time since transit hp = 0.5 * self.period dt = tt.mod(self._warp_times(t) - self.t0 + hp, self.period) - hp if self.ecc is None: # Equation 14 from Winn (2010) k = r / R arg = tt.square(1 + k) - tt.square(self.b) factor = R / (self.a * self.sin_incl) hdur = hp * tt.arcsin(factor * tt.sqrt(arg)) / np.pi t_start = -hdur t_end = hdur flag = z else: M_contact = self.contact_points_op( self.a, self.ecc, self.cos_omega, self.sin_omega, self.cos_incl + z, self.sin_incl + z, R + r) flag = M_contact[2] t_start = (M_contact[0] - self.M0) / self.n t_start = tt.mod(t_start + hp, self.period) - hp t_end = (M_contact[1] - self.M0) / self.n t_end = tt.mod(t_end + hp, self.period) - hp t_start = tt.switch(tt.gt(t_start, 0.0), t_start - self.period, t_start) t_end = tt.switch(tt.lt(t_end, 0.0), t_end + self.period, t_end) if texp is not None: t_start -= 0.5*texp t_end += 0.5*texp mask = tt.any(tt.and_(dt >= t_start, dt <= t_end), axis=-1) result = ifelse(tt.all(tt.eq(flag, 0)), tt.arange(t.size)[mask], tt.arange(t.size)) return result
def logp(self, x): n = self.n p = self.p if x.ndim==2: x_sum = x.sum(axis=0) n_sum = n * x.shape[0] else: x_sum = x n_sum = n return bound( factln(n_sum) + tt.sum(x_sum * tt.log(p) - factln(x_sum)), tt.all(x >= 0), tt.all(x <= n), tt.eq(tt.sum(x_sum), n_sum), tt.isclose(p.sum(), 1), n >= 0)
def __init__(self, a, transform=transforms.stick_breaking, *args, **kwargs): shape = a.shape[0] kwargs.setdefault("shape", shape) super(Dirichlet, self).__init__(transform=transform, *args, **kwargs) self.k = shape self.a = a self.mean = a / T.sum(a) self.mode = T.switch(T.all(a > 1), (a - 1) / T.sum(a - 1), np.nan)
def logp(self, x): # x is assumed to be (s x n_elem) if s > 1 or n_elem n = self.n p = self.p s = self.s if s !=1: X = self._X_inner_creation(x) result = self._results_inner(n,p,s,x) return pm.dist_math.bound(result, tt.all(X <= 1), tt.all(X >= -1), n > 0) else: X = x[self.tri_index] X = tt.fill_diagonal(X, 1) result = self._normalizing_constant(n, p) result += (n - 1.) * tt.log(tt.nlinalg.det(X)) return pm.dist_math.bound(result, tt.all(X <= 1), tt.all(X >= -1), n > 0)
def test_jax_logp(): mu = tt.vector("mu") mu.tag.test_value = np.r_[0.0, 0.0].astype(tt.config.floatX) tau = tt.vector("tau") tau.tag.test_value = np.r_[1.0, 1.0].astype(tt.config.floatX) sigma = tt.vector("sigma") sigma.tag.test_value = (1.0 / get_test_value(tau)).astype(tt.config.floatX) value = tt.vector("value") value.tag.test_value = np.r_[0.1, -10].astype(tt.config.floatX) logp = (-tau * (value - mu)**2 + tt.log(tau / np.pi / 2.0)) / 2.0 conditions = [sigma > 0] alltrue = tt.all([tt.all(1 * val) for val in conditions]) normal_logp = tt.switch(alltrue, logp, -np.inf) fgraph = theano.gof.FunctionGraph([mu, tau, sigma, value], [normal_logp]) _ = compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs])
def __init__(self, a, transform=transforms.stick_breaking, *args, **kwargs): self.k = shape = a.shape[0] if "shape" not in kwargs.keys(): kwargs.update({"shape": shape}) super(Dirichlet, self).__init__(transform=transform, *args, **kwargs) self.a = a self.mean = a / sum(a) self.mode = switch(all(a > 1), (a - 1) / sum(a - 1), nan)
def get_sentence_embeddings_function(hidden_states): sentence_embedding, _ = theano.scan(fn=inner_loop, sequences=hidden_states) sentence_embedding = ifelse( T.all( T.eq(sentence_embedding[-1], T.zeros_like(sentence_embedding[-1]))), sentence_embedding[-2], sentence_embedding[-1]) return sentence_embedding
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) IXI = det(X) return bound( ((n - p - 1) * log(IXI) - trace(matrix_inverse(V).dot(X)) - n * p * log(2) - n * log(IVI) - 2 * multigammaln(n / 2., p)) / 2, gt(n, (p - 1)), all(gt(eigh(X)[0], 0)), eq(X, X.T))
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) IXI = det(X) return bound( ((n - p - 1) * T.log(IXI) - trace(matrix_inverse(V).dot(X)) - n * p * T.log(2) - n * T.log(IVI) - 2 * multigammaln(n / 2., p)) / 2, T.all(eigh(X)[0] > 0), T.eq(X, X.T), n > (p - 1))
def __init__(self, a, transform=transforms.stick_breaking, *args, **kwargs): shape = a.shape[-1] kwargs.setdefault("shape", shape) super(Dirichlet, self).__init__(transform=transform, *args, **kwargs) self.k = tt.as_tensor_variable(shape) self.a = a = tt.as_tensor_variable(a) self.mean = a / tt.sum(a) self.mode = tt.switch(tt.all(a > 1), (a - 1) / tt.sum(a - 1), np.nan)
def _quaddist_tau(self, delta): chol_tau = self.chol_tau _, k = delta.shape k = pm.floatX(k) diag = tt.nlinalg.diag(chol_tau) ok = tt.all(diag > 0) chol_tau = tt.switch(ok, chol_tau, 1) diag = tt.nlinalg.diag(chol_tau) delta_trans = tt.dot(delta, chol_tau) quaddist = (delta_trans ** 2).sum(axis=-1) logdet = -tt.sum(tt.log(diag)) return quaddist, logdet, ok
def _quaddist_chol(self, delta): chol_cov = self.chol_cov _, k = delta.shape k = pm.floatX(k) diag = tt.nlinalg.diag(chol_cov) # Check if the covariance matrix is positive definite. ok = tt.all(diag > 0) # If not, replace the diagonal. We return -inf later, but # need to prevent solve_lower from throwing an exception. chol_cov = tt.switch(ok, chol_cov, 1) delta_trans = self.solve_lower(chol_cov, delta.T).T quaddist = (delta_trans ** 2).sum(axis=-1) logdet = tt.sum(tt.log(diag)) return quaddist, logdet, ok
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) IXI = det(X) return bound( ((n - p - 1) * log(IXI) - trace(matrix_inverse(V).dot(X)) - n * p * log(2) - n * log(IVI) - 2 * multigammaln(n / 2., p)) / 2, gt(n, (p - 1)), all(gt(eigh(X)[0], 0)), eq(X, X.T) )
def apply(self, source, source_mask, source_x, attention): if source.ndim != 3 or attention.ndim != 2: raise NotImplementedError align_matrix = T.tanh(source_x + T.dot(attention, self.Wa)[None, :, :]) align = theano.dot(align_matrix, self.v) align = T.exp(align - align.max(axis=0, keepdims=True)) if source_mask: align = align * source_mask normalization = align.sum(axis=0) + T.all(1 - source_mask, axis=0) else: normalization = align.sum(axis=0) align = align/normalization self.output = (T.shape_padright(align) * source).sum(axis=0) return self.output
def function(inputs, outputs=None, check_valid=False, checks=(), **kwargs): input_names = None output_names = None if isinstance(inputs, dict): if inputs: (input_names, inputs) = zip(*inputs.iteritems()) else: (input_names, inputs) = ((), ()) if isinstance(outputs, dict): if outputs: (output_names, outputs) = zip(*outputs.iteritems()) else: (output_names, outputs) = ((), ()) if check_valid or checks: updates = kwargs.setdefault('updates', {}) asserts = [assert_(c, 'check failed: %s' % c) for c in checks] if check_valid: if outputs: if not isinstance(outputs, (list, tuple)): outputs = [outputs] asserts += (assert_(isvalid(x), 'output invalid: %d (%s)' % (i, x.name)) for (i, x) in enumerate(outputs)) if updates: asserts += (assert_(isvalid(xnew), 'update invalid: variable %s' % str(x)) for (x, xnew) in updates.iteritems()) checks_passed = theano.shared(np.int8(1), name='checks_passed') updates[checks_passed] = \ T.all(T.as_tensor_variable(asserts)).astype('int8') f = _CheckedFunction(inputs, outputs, **kwargs) else: f = theano.function(inputs, outputs, **kwargs) if hasattr(f.fn, 'clear_storage'): f.clear_storage = f.fn.clear_storage else: _log.warn('Function %s has no clear_storage: disabling', f.fn) f.clear_storage = lambda: None if input_names is not None or output_names is not None: return NamedInputOutputFunction(input_names, output_names, f) return f
def grad(self, inputs, gradients): """ Cholesky decomposition reverse-mode gradient update. Symbolic expression for reverse-mode Cholesky gradient taken from [0]_ References ---------- .. [0] I. Murray, "Differentiation of the Cholesky decomposition", http://arxiv.org/abs/1602.07527 """ x = inputs[0] dz = gradients[0] chol_x = self(x) ok = tt.all(tt.nlinalg.diag(chol_x) > 0) chol_x = tt.switch(ok, chol_x, tt.fill_diagonal(chol_x, 1)) dz = tt.switch(ok, dz, floatX(1)) # deal with upper triangular by converting to lower triangular if not self.lower: chol_x = chol_x.T dz = dz.T def tril_and_halve_diagonal(mtx): """Extracts lower triangle of square matrix and halves diagonal.""" return tt.tril(mtx) - tt.diag(tt.diagonal(mtx) / 2.) def conjugate_solve_triangular(outer, inner): """Computes L^{-T} P L^{-1} for lower-triangular L.""" solve = tt.slinalg.Solve(A_structure="upper_triangular") return solve(outer.T, solve(outer.T, inner.T).T) s = conjugate_solve_triangular( chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz))) if self.lower: grad = tt.tril(s + s.T) - tt.diag(tt.diagonal(s)) else: grad = tt.triu(s + s.T) - tt.diag(tt.diagonal(s)) return [tt.switch(ok, grad, floatX(np.nan))]
def scan_step(pt_pre, h_pre, k_pre, w_pre, mask, seq_str, seq_str_mask, bias): h, a, k, p, w = self.pos_layer.step( pt_pre, h_pre, k_pre, w_pre, seq_str, seq_str_mask, mask=mask) h_conc = T.concatenate([h, w], axis=-1) pt = self.mixture.prediction(h_conc, bias) # ending condition last_char = T.cast(T.sum(seq_str_mask, axis=0)-1, 'int32') last_phi = p[last_char, T.arange(last_char.shape[0])] max_phi = T.max(p, axis=0) condition = last_phi >= 0.95*max_phi mask = T.switch(condition, .0, mask) return ((pt, h, a, k, p, w, mask), theano.scan_module.until(T.all(mask < 1.)))