def __theano_shorttermError(self, targetM, i, x): pull_error = 0. ivectors = x[self.Tneighbor[0]] jvectors = x[self.Tneighbor[1]] diffv = ivectors - jvectors Dneighbor = linalg.diag(diffv.dot(targetM).dot(diffv.T)) pull_error = T.sum(Dneighbor) push_error = 0.0 ivectors = x[self.Ttriple[0]] jvectors = x[self.Ttriple[1]] lvectors = x[self.Ttriple[2]] diffij = ivectors - jvectors diffil = ivectors - lvectors lossij = diffij.dot(targetM).dot(diffij.T) lossil = diffil.dot(targetM).dot(diffil.T) mask = T.neq(self.Ty[self.Ttriple[0]], self.Ty[self.Ttriple[2]]) Dtripleij = linalg.diag(lossij) Dtripleil = linalg.diag(lossil) push_error = (mask*T.maximum(Dtripleij - Dtripleil + 1, 0)).sum() self.nonzerocount = (mask*linalg.diag(T.gt(lossij - lossil + 1, 0))).sum() # print np.sqrt((i+1.0)/self.M) # pull_error = pull_error * np.sqrt((i+1.0)/self.M) # push_error = push_error * np.sqrt((i+1.0)/self.M) return pull_error, push_error, Dneighbor, Dtripleij, Dtripleil
def diag(self, inputs, outputs, noise=False): if noise: cho = tt.sqrt(tnl.diag(self.noisy.cov(inputs))) #cho = cholesky_robust(self.noisy.cov(inputs)) else: cho = tt.sqrt(tnl.diag(self.kernel.cov(inputs))) #cho = cholesky_robust(self.kernel.cov(inputs)) return cho * outputs
def get_cost(self): query_matrix = sparse.basic.dot(self.query, self.W_q) query_matrix = query_matrix.dimshuffle(1,0) plus_matrix = T.dot(T.dot(self.plus, self.W_x), query_matrix) plus_vector = diag(plus_matrix) minus_matrix = T.dot(T.dot(self.minus, self.W_x), query_matrix) minus_vector = diag(minus_matrix) cost_vector = T.maximum(0.0, 1 - plus_vector + minus_vector) return T.mean(cost_vector)
def __init__(self, dim, name=None): self.name = name self.dim = dim self.means = utilsT.sharedf(np.zeros(dim)) self.vars = utilsT.sharedf(np.ones(dim)) self.varmat = tlin.diag(self.vars) self.rmat = tlin.diag(T.sqrt(self.vars)) self.means_ = self.means.dimshuffle(['x', 0]) self.qzft = mathT.multiNormInit_sharedParams(self.means, self.varmat, self.dim) self.qzfn = None self.params = [self.means, self.vars]
def call(self, X, training=None): X0 = K.dot(X, self.U) if training in {0, False}: return X0 nd = K.shape(X)[1] n = K.shape(X)[0] C = K.dot(K.transpose(X), X) / K.cast(n - 1, 'float32') self.C = self.momentum * self.C + (1 - self.momentum) * C C = C + self.r * eye_like(C) [D, V] = eigh(C) # Added to increase stability if BACKEND == 'theano': posInd = K.greater(D, eps).nonzero()[0] D = D[posInd] V = V[:, posInd] else: posBool = K.greater(D, eps) D = tf.boolean_mask(D, posBool) V = tf.boolean_mask(V, posBool, axis=1) U = K.dot(K.dot(V, diag(reciprocal(K.sqrt(D)))), K.transpose(V)) U = K.transpose(U) self.add_update([(self.U, U)], X) X_updated = K.dot(X, U) return K.in_train_phase(X_updated, X0, training=training)
def test_diag(self): # test that it builds a matrix with given diagonal when using # vector inputs x = theano.tensor.vector() y = diag(x) assert y.owner.op.__class__ == AllocDiag # test that it extracts the diagonal when using matrix input x = theano.tensor.matrix() y = extract_diag(x) assert y.owner.op.__class__ == ExtractDiag
def __init__(self): ''' :return: ''' super(banana, self).__init__() self.priormus = utils.sharedf(np.zeros(2)) self.priorvar = utils.sharedf(np.eye(2)) self.stdn = utils.sharedf([.5, .5]) self.varn = nlinalg.diag(T.sqr(self.stdn)) self.logPz = mathT.gaussInit(self.priormus, self.priorvar)
def logp_cho(cls, value, mu, cho, mapping): """ Calculates the log p of the parameters given the data :param value: the data :param mu: the location (obtained from the hiperparameters) :param cho: the cholesky decomposition of the dispersion matrix :param mapping: the mapping of the warped. :return: it returns the value of the log p of the parameters given the data (values) """ #print(value.tag.test_value) #print(mu.tag.test_value) #print(mapping.inv(value).tag.test_value) #mu = debug(mu, 'mu', force=True) #value = debug(value, 'value', force=False) delta = mapping.inv(value) - mu #delta = debug(delta, 'delta', force=True) #cho = debug(cho, 'cho', force=True) lcho = tsl.solve_lower_triangular(cho, delta) #lcho = debug(lcho, 'lcho', force=False) lcho2 = lcho.T.dot(lcho) #lcho2 = debug(lcho2, 'lcho2', force=True) npi = np.float32(-0.5) * cho.shape[0].astype( th.config.floatX) * tt.log(np.float32(2.0 * np.pi)) dot2 = np.float32(-0.5) * lcho2 #diag = debug(tnl.diag(cho), 'diag', force=True) #_log= debug(tt.log(diag), 'log', force=True) det_k = -tt.sum(tt.log(tnl.diag(cho))) det_m = mapping.logdet_dinv(value) #npi = debug(npi, 'npi', force=False) #dot2 = debug(dot2, 'dot2', force=False) #det_k = debug(det_k, 'det_k', force=False) #det_m = debug(det_m, 'det_m', force=False) r = npi + dot2 + det_k + det_m cond1 = tt.or_(tt.any(tt.isinf_(delta)), tt.any(tt.isnan_(delta))) cond2 = tt.or_(tt.any(tt.isinf_(det_m)), tt.any(tt.isnan_(det_m))) cond3 = tt.or_(tt.any(tt.isinf_(cho)), tt.any(tt.isnan_(cho))) cond4 = tt.or_(tt.any(tt.isinf_(lcho)), tt.any(tt.isnan_(lcho))) return ifelse( cond1, np.float32(-1e30), ifelse( cond2, np.float32(-1e30), ifelse(cond3, np.float32(-1e30), ifelse(cond4, np.float32(-1e30), r))))
def __init__(self, X, a_alpha=1e-3, b_alpha=1e-3, a_tau=1e-3, b_tau=1e-3, beta=1e-3): # data, # of samples, dims self.X = X self.d = self.X.shape[1] self.N = self.X.shape[0] self.q = self.d - 1 # hyperparameters self.a_alpha = a_alpha self.b_alpha = b_alpha self.a_tau = a_tau self.b_tau = b_tau self.beta = beta with pm.Model() as model: z = pm.MvNormal('z', mu=np.zeros(self.q), cov=np.eye(self.q), shape=(self.N, self.q)) mu = pm.MvNormal('mu', mu=np.zeros(self.d), cov=np.eye(self.d) / self.beta, shape=self.d) alpha = pm.Gamma('alpha', alpha=self.a_alpha, beta=self.b_alpha, shape=self.q) w = pm.MatrixNormal('w', mu=np.zeros((self.d, self.q)), rowcov=np.eye(self.d), colcov=diag(1 / alpha), shape=(self.d, self.q)) tau = pm.Gamma('tau', alpha=self.a_tau, beta=self.b_tau) x = pm.math.dot(z, w.T) + mu obs_x = pm.MatrixNormal('obs_x', mu=x, rowcov=np.eye(self.N), colcov=np.eye(self.d) / tau, shape=(self.N, self.d), observed=self.X) self.model = model
def grad(self, inp, cost_grad): """ Notes ----- The gradient is currently implemented for matrices only. """ a, val = inp grad = cost_grad[0] if a.dtype.startswith("complex"): return [None, None] elif a.ndim > 2: raise NotImplementedError("%s: gradient is currently implemented" " for matrices only" % self.__class__.__name__) wr_a = fill_diagonal(grad, 0) # valid for any number of dimensions # diag is only valid for matrices wr_val = nlinalg.diag(grad).sum() return [wr_a, wr_val]
def test_diag(self): # test that it builds a matrix with given diagonal when using # vector inputs x = theano.tensor.vector() y = diag(x) assert y.owner.op.__class__ == AllocDiag # test that it extracts the diagonal when using matrix input x = theano.tensor.matrix() y = extract_diag(x) assert y.owner.op.__class__ == ExtractDiag # other types should raise error x = theano.tensor.tensor3() ok = False try: y = extract_diag(x) except TypeError: ok = True assert ok
def setup_model(self, data): with pm.Model() as model: self.transmat_ = pm.Normal('Tmat', mu=1, sd=1, shape=(self.latent_dimension)) self.hidden_states.append( pm.Normal('H0', mu=0, sd=1, shape=(self.sample_minibatch, self.latent_dimension), testval=np.random.randn(self.sample_minibatch, self.latent_dimension))) for i in range(1, self.num_time_steps): self.hidden_states.append( th.dot(self.hidden_states[-1], diag(self.transmat_))) F = pm.Normal('F', mu=0, sd=1, shape=(self.latent_dimension, self.observ_dimension), testval=np.random.randn(self.latent_dimension, self.observ_dimension)) for i in range(self.num_time_steps): self.observed_states.append( pm.Normal('X_{}'.format(i), mu=th.dot(self.hidden_states[i], F), sd=1, shape=(self.sample_minibatch, self.observ_dimension), observed=data[i])) approx = pm.fit(n=45000, method=pm.ADVI()) trace = approx.sample(500) import pickle with open('pick.dump2.pkl', 'wb') as buff: pickle.dump({ 'model': model, 'approx': approx, 'trace': trace }, buff)
def logp_cho(cls, value, mu, cho, freedom, mapping): delta = mapping.inv(value) - mu lcho = tsl.solve_lower_triangular(cho, delta) beta = lcho.T.dot(lcho) n = cho.shape[0].astype(th.config.floatX) np5 = np.float32(0.5) np2 = np.float32(2.0) npi = np.float32(np.pi) r1 = -np5 * (freedom + n) * tt.log1p(beta / (freedom - np2)) r2 = ifelse( tt.le(np.float32(1e6), freedom), -n * np5 * np.log(np2 * npi), tt.gammaln((freedom + n) * np5) - tt.gammaln(freedom * np5) - np5 * n * tt.log((freedom - np2) * npi)) r3 = -tt.sum(tt.log(tnl.diag(cho))) det_m = mapping.logdet_dinv(value) r1 = debug(r1, name='r1', force=True) r2 = debug(r2, name='r2', force=True) r3 = debug(r3, name='r3', force=True) det_m = debug(det_m, name='det_m', force=True) r = r1 + r2 + r3 + det_m cond1 = tt.or_(tt.any(tt.isinf_(delta)), tt.any(tt.isnan_(delta))) cond2 = tt.or_(tt.any(tt.isinf_(det_m)), tt.any(tt.isnan_(det_m))) cond3 = tt.or_(tt.any(tt.isinf_(cho)), tt.any(tt.isnan_(cho))) cond4 = tt.or_(tt.any(tt.isinf_(lcho)), tt.any(tt.isnan_(lcho))) return ifelse( cond1, np.float32(-1e30), ifelse( cond2, np.float32(-1e30), ifelse(cond3, np.float32(-1e30), ifelse(cond4, np.float32(-1e30), r))))
def diag(x): return nla.diag(x)
def th_covariance(self, prior=False, noise=False): return tnl.diag(self.f_density.th_variance(self.th_space))
def predict_score_batch(self, query, image): query_matrix = sparse.basic.dot(query, self.W_q) query_matrix = query_matrix.dimshuffle(1,0) img_matrix = T.dot(T.dot(image, self.W_x), query_matrix) return diag(img_matrix)
def logdet_dinv(self, inputs, outputs): cho = cholesky_robust(self.noisy.cov(inputs)) return -tt.sum(tt.log(tnl.diag(cho)))
def predict_score_batch(self, query, image): socre_matrix = T.dot(T.dot(query, self.W_x), image.dimshuffle(1, 0)) return diag(socre_matrix)
def predict_score_batch(self, query, image): socre_matrix = T.dot (T.dot(query, self.W_x), image.dimshuffle(1,0)) return diag(socre_matrix)
def fn1(x, y, trainx, trainy): diff = x - trainx cost = linalg.diag(diff.dot(A).dot(diff.T)) * T.eq(y, trainy) return cost