def moments(self): """Calculate covariance and correlation matrices, trait, genotipic and ontogenetic means""" zs = np.array([ind["z"] for ind in self.pop]) xs = np.array([ind["x"] for ind in self.pop]) ys = np.array([ind["y"] for ind in self.pop]) bs = np.array([ind["b"] for ind in self.pop]) ymean = ys.mean(axis=0) zmean = zs.mean(axis=0) xmean = xs.mean(axis=0) ymean = ys.mean(axis=0) bmean = bs.mean(axis=0) phenotipic = np.cov(zs, rowvar=0, bias=1) genetic = np.cov(xs, rowvar=0, bias=1) heridability = genetic[np.diag_indices_from(genetic)] / phenotipic[np.diag_indices_from(phenotipic)] corr_phenotipic = np.corrcoef(zs, rowvar=0, bias=1) corr_genetic = np.corrcoef(xs, rowvar=0, bias=1) avgP = avg_ratio(corr_phenotipic, self.modules) avgG = avg_ratio(corr_genetic, self.modules) return { "y.mean": ymean, "b.mean": bmean, "z.mean": zmean, "x.mean": xmean, "P": phenotipic, "G": genetic, "h2": heridability, "avgP": avgP, "avgG": avgG, "corrP": corr_phenotipic, "corrG": corr_genetic, }
def Voigt_6x6_to_cubic(C): """ Convert the Voigt 6x6 representation into the cubic elastic constants C11, C12 and C44. """ tol = 1e-6 C_check = np.zeros_like(C) C_check[np.diag_indices_from(C_check)] = C[np.diag_indices_from(C)] C_check[0:3,0:3] = C[0:3,0:3] if np.any(np.abs(C-C_check) > tol): raise ValueError('"C" does not have cubic symmetry.') C11s = np.array([C[0,0], C[1,1], C[2,2]]) C12s = np.array([C[1,2], C[0,2], C[0,1]]) C44s = np.array([C[3,3], C[4,4], C[5,5]]) C11 = np.mean(C11s) C12 = np.mean(C12s) C44 = np.mean(C44s) if np.any(np.abs(C11-C11s) > tol) or np.any(np.abs(C12-C12s) > tol) or \ np.any(np.abs(C44-C44s) > tol): raise ValueError('"C" does not have cubic symmetry.') return np.array([C11, C12, C44])
def transform_covars_grad(self, internal_grad): grad = np.empty((self.num_latent, self.get_covar_size()), dtype=np.float32) for j in range(self.num_latent): tmp = self._theano_transform_covars_grad(internal_grad[0, j], self.covars_cholesky[j]) tmp[np.diag_indices_from(tmp)] *= self.covars_cholesky[j][np.diag_indices_from(tmp)] grad[j] = tmp[np.tril_indices_from(self.covars_cholesky[j])] return grad.flatten()
def _get_raw_covars(self): flattened_covars = np.empty([self.num_latent, self.get_covar_size()], dtype=np.float32) for i in xrange(self.num_latent): raw_covars = self.covars_cholesky[i].copy() raw_covars[np.diag_indices_from(raw_covars)] = np.log(raw_covars[np.diag_indices_from(raw_covars)]) flattened_covars[i] = raw_covars[np.tril_indices_from(raw_covars)] return flattened_covars.flatten()
def test_cosine_distances(): # Check the pairwise Cosine distances computation rng = np.random.RandomState(1337) x = np.abs(rng.rand(910)) XA = np.vstack([x, x]) D = cosine_distances(XA) assert_array_almost_equal(D, [[0., 0.], [0., 0.]]) # check that all elements are in [0, 2] assert np.all(D >= 0.) assert np.all(D <= 2.) # check that diagonal elements are equal to 0 assert_array_almost_equal(D[np.diag_indices_from(D)], [0., 0.]) XB = np.vstack([x, -x]) D2 = cosine_distances(XB) # check that all elements are in [0, 2] assert np.all(D2 >= 0.) assert np.all(D2 <= 2.) # check that diagonal elements are equal to 0 and non diagonal to 2 assert_array_almost_equal(D2, [[0., 2.], [2., 0.]]) # check large random matrix X = np.abs(rng.rand(1000, 5000)) D = cosine_distances(X) # check that diagonal elements are equal to 0 assert_array_almost_equal(D[np.diag_indices_from(D)], [0.] * D.shape[0]) assert np.all(D >= 0.) assert np.all(D <= 2.)
def newCostFunction(self, xs, ys, test=False): xs = np.array(xs) ys = np.array(ys) s1 = xs.dot(ys.T).T s2 = ys.dot(xs.T).T s1 = np.maximum(0, 1 - np.diag(s1) + s1).T s2 = np.maximum(0, 1 - np.diag(s2) + s2).T s1[np.diag_indices_from(s1)] = 0 s2[np.diag_indices_from(s2)] = 0 ns1 = s1 ns2 = s2 cost = np.sum(s1)+np.sum(s2) if abs(cost - 2) < 1e-5: import pdb pdb.set_trace() if test: return cost s1t = s1 > 0 s2t = s2 > 0 tx1 = (ys[:,:,None].T - ys[:,:,None]).transpose([0,2,1])*s1t[:,:,None] ty1 = (xs[:,:,None].T - xs[:,:,None]).transpose([0,2,1])*s2t[:,:,None] tx2 = (ys * np.ones((len(xs),len(xs),xs[0].size))).transpose(1,0,2) * s2t[:,:,None] ty2 = (xs * np.ones((len(xs),len(xs),xs[0].size))).transpose(1,0,2) * s1t[:,:,None] tx3 = (s2t.T)[:,:,None]*ys ty3 = (s1t.T)[:,:,None]*xs xd = np.sum(tx1 - tx2 + tx3, 1) yd = np.sum(ty1 - ty2 + ty3, 1) #print 'xd norm: %.4f, yd norm: %.4f'%(np.linalg.norm(xd), np.linalg.norm(yd)) return cost, list(xd), list(yd)
def set_covars(self, raw_covars): raw_covars = raw_covars.reshape([self.num_latent, self.get_covar_size()]) for j in xrange(self.num_latent): cholesky = np.zeros([self.num_dim, self.num_dim], dtype=np.float32) cholesky[np.tril_indices_from(cholesky)] = raw_covars[j] cholesky[np.diag_indices_from(cholesky)] = np.exp(cholesky[np.diag_indices_from(cholesky)]) self.covars_cholesky[j] = cholesky self.covars[j] = mdot(self.covars_cholesky[j], self.covars_cholesky[j].T)
def _update(self): self.parameters = self.get_parameters() for k in range(self.num_comp): for j in range(self.num_process): temp = np.zeros((self.num_dim, self.num_dim)) temp[np.tril_indices_from(temp)] = self.L_flatten[k,j,:].copy() temp[np.diag_indices_from(temp)] = np.exp(temp[np.diag_indices_from(temp)]) # temp[np.diag_indices_from(temp)] = temp[np.diag_indices_from(temp)] ** 2 self.L[k,j,:,:] = temp self.s[k,j] = mdot(self.L[k,j,:,:], self.L[k,j,:,:].T)
def update_covariance(self, j, Sj): Sj = Sj.copy() mm = min(Sj[np.diag_indices_from(Sj)]) if mm < 0: Sj[np.diag_indices_from(Sj)] = Sj[np.diag_indices_from(Sj)] - 1.1 * mm for k in range(self.num_comp): self.s[k,j] = Sj.copy() self.L[k,j] = jitchol(Sj,10) tmp = self.L[k,j].copy() tmp[np.diag_indices_from(tmp)] = np.log(tmp[np.diag_indices_from(tmp)]) self.L_flatten[k,j] = tmp[np.tril_indices_from(tmp)] self._update()
def getNormDistFluct(self, coords): """Normalized distance fluctuation """ model = self.getModel() LOGGER.info('Number of chains: {0}, chains: {1}.' .format(len(list(set(coords.getChids()))), \ list(set(coords.getChids())))) try: #coords = coords.select('protein and name CA') coords = (coords._getCoords() if hasattr(coords, '_getCoords') else coords.getCoords()) except AttributeError: try: checkCoords(coords) except TypeError: raise TypeError('coords must be a Numpy array or an object ' 'with `getCoords` method') if not isinstance(model, NMA): LOGGER.info('Calculating new model') model = GNM('prot analysis') model.buildKirchhoff(coords) model.calcModes() linalg = importLA() n_atoms = model.numAtoms() n_modes = model.numModes() LOGGER.timeit('_ndf') from .analysis import calcCrossCorr from numpy import linalg as LA # <dRi, dRi>, <dRj, dRj> = 1 crossC = 2-2*calcCrossCorr(model) r_ij = np.zeros((n_atoms,n_atoms,3)) for i in range(n_atoms): for j in range(i+1,n_atoms): r_ij[i][j] = coords[j,:] - coords[i,:] r_ij[j][i] = r_ij[i][j] r_ij_n = LA.norm(r_ij, axis=2) #with np.errstate(divide='ignore'): r_ij_n[np.diag_indices_from(r_ij_n)] = 1e-5 # div by 0 crossC=abs(crossC) normdistfluct = np.divide(np.sqrt(crossC),r_ij_n) LOGGER.report('NDF calculated in %.2lfs.', label='_ndf') normdistfluct[np.diag_indices_from(normdistfluct)] = 0 # div by 0 return normdistfluct
def ExpandNode(fringe,node): col_sum = np.sum(node.attacked_cells,0) dict_sum = {} for i in range(8): if col_sum[0,i] == 8: continue dict_sum[i] = col_sum[0,i] sorted_sum = sorted(dict_sum.items(),key=operator.\ itemgetter(1),reverse=True) for i in range(len(sorted_sum)): col = sorted_sum[i][0] for row in range(8): if node.attacked_cells[row,col]: continue attacked_cells = copy.deepcopy(node.attacked_cells) attacked_cells[:,col] = 1 attacked_cells[row,:] = 1 k = row-col rows, cols = np.diag_indices_from(attacked_cells) if k < 0: rows,cols = rows[:k],cols[-k:] elif k > 0: rows,cols = rows[k:],cols[:-k] attacked_cells[rows,cols] = 1 attacked_cells = np.fliplr(attacked_cells) ncol = 7-col k = row-ncol rows, cols = np.diag_indices_from(attacked_cells) if k < 0: rows,cols = rows[:k],cols[-k:] elif k > 0: rows,cols = rows[k:],cols[:-k] attacked_cells[rows,cols] = 1 attacked_cells = np.fliplr(attacked_cells) valid = True for i in range(node.depth+1,8): if np.sum(attacked_cells[i,:]) == 8: valid = False break if not valid: continue nstate = copy.deepcopy(node.state) nstate[row,col] = 1 new_node = Node(parent=node,depth=node.depth\ +1,state=nstate,attacked_cells=attacked_cells) fringe.insert(0,new_node)
def problem8(): "problem set 2.1, problem 8, page 56" import LUdecomp A = np.array([[-3,6,-4],[9,-8,24],[-12,24,-26]],dtype=float) A_orig = A.copy() LU = LUdecomp.LUdecomp(A) b = np.array([-3,65,-42],dtype=float) b_orig = b.copy() x = LUdecomp.LUsolve(LU,b) # extract L and U for verification U = np.triu(LU) # L = np.tril(LU) L[ np.diag_indices_from(L) ] = 1.0 print(""" Problem 8: A = {} LU decomposition A = LU, LU (in one matrix) = {} Solving Ax=b, with b = {} Solution x = {} Verifying solution: residual ||Ax-b||_2 = {} ||A - dot(L,U)||_inf = {} """.format(A_orig,LU,b_orig,x, la.norm(np.dot(A_orig,x)-b_orig,2), la.norm(A_orig - np.dot(L,U),np.inf)) )
def test_map_diag_and_offdiag(self): vars = ["x", "y", "z"] g = ag.PairGrid(self.df) g.map_offdiag(plt.scatter) g.map_diag(plt.hist) for ax in g.diag_axes: nt.assert_equal(len(ax.patches), 10) for i, j in zip(*np.triu_indices_from(g.axes, 1)): ax = g.axes[i, j] x_in = self.df[vars[j]] y_in = self.df[vars[i]] x_out, y_out = ax.collections[0].get_offsets().T npt.assert_array_equal(x_in, x_out) npt.assert_array_equal(y_in, y_out) for i, j in zip(*np.tril_indices_from(g.axes, -1)): ax = g.axes[i, j] x_in = self.df[vars[j]] y_in = self.df[vars[i]] x_out, y_out = ax.collections[0].get_offsets().T npt.assert_array_equal(x_in, x_out) npt.assert_array_equal(y_in, y_out) for i, j in zip(*np.diag_indices_from(g.axes)): ax = g.axes[i, j] nt.assert_equal(len(ax.collections), 0)
def ddiag(a, nozero=False): """ Robust diagonalization : always put selected diagonal on a diagonal! This small function aims at getting a behaviour closer to the mathematical "hat", compared to what np.diag() can delivers. If applied to a vector or a 2d-matrix with one dimension of size 1, put the coefficients on the diagonal of a matrix with off-diagonal elements equal to zero. If applied to a 2d-matrix (with all dimensions of size > 1), replace all off-diagonal elements by zeros. Parameters ---------- a : numpy matrix or vector to be diagonalized Returns -------- b : Diagonalized vector Raises: ValueError if a is more than 2dimensional See Also -------- diag """ # If numpy vector if a.ndim == 1: b = np.diag(a) # If numpy 2d-array elif a.ndim == 2: #...but with dimension of magnitude 1 if min(a.shape) == 1: b = np.diag(np.squeeze(a)) # ... or a "true" 2-d matrix else: b = np.diag(np.diag(a)) else: raise ValueError("Input must be 1- or 2-d") # Extreme case: a 1 element matrix/vector if b.ndim == 1 & b.size == 1: b = b.reshape((1, 1)) if nozero: # Replace offdiagonal zeros by nan if desired c = np.empty_like(b) * np.nan di = np.diag_indices_from(c) c[di] = b.diagonal() return c else: # A certainly diagonal vector is returned return b
def report_clustering_dot_product(loci, thresholds_pack, method, feature_labels): thr_occ, thr_crisp, cluster_thresholds = thresholds_pack M = scores.generate_dot_product_score_matrix(feature_labels, method, loci=loci) M += np.transpose(M) M = -1 * np.log(M) M[np.diag_indices_from(M)] = 0 M[np.where(M==np.inf)] = 100 reports_dir_base = os.path.join(gv.project_data_path, 'cas4/reports/') cluster2summary_file_path = os.path.join(gv.project_data_path, 'cas4/reports/cluster_summary.tab') for threshold in cluster_thresholds: repors_dir = reports_dir_base + 'dot_%s_%d_%.2f_%.2f'%(method, thr_occ, thr_crisp, threshold) # print "Thresholds:", thr_occ, thr_crisp, threshold # print repors_dir # if os.path.exists(repors_dir): # sh.rmtree(repors_dir) # os.mkdir(repors_dir) singles, cluster_packs, entropies = dendrogram.classify_by_scores_cas4(M, threshold, loci) _local_thresholds_pack = (thr_occ, thr_crisp, threshold) generate_cluster_reports_cas4(cluster_packs, loci, repors_dir, feature_labels, method, _local_thresholds_pack) generate_cas4_gi_summary_file(singles, cluster_packs, loci, repors_dir, cluster2summary_file_path)
def test_pairplot(self): vars = ["x", "y", "z"] g = pairplot(self.df) for ax in g.diag_axes: nt.assert_equal(len(ax.patches), 10) for i, j in zip(*np.triu_indices_from(g.axes, 1)): ax = g.axes[i, j] x_in = self.df[vars[j]] y_in = self.df[vars[i]] x_out, y_out = ax.collections[0].get_offsets().T npt.assert_array_equal(x_in, x_out) npt.assert_array_equal(y_in, y_out) for i, j in zip(*np.tril_indices_from(g.axes, -1)): ax = g.axes[i, j] x_in = self.df[vars[j]] y_in = self.df[vars[i]] x_out, y_out = ax.collections[0].get_offsets().T npt.assert_array_equal(x_in, x_out) npt.assert_array_equal(y_in, y_out) for i, j in zip(*np.diag_indices_from(g.axes)): ax = g.axes[i, j] nt.assert_equal(len(ax.collections), 0) plt.close("all")
def _generate_noise(covar_matrix, time=1000, use_inverse=False): """ Generate a multivariate normal distribution using correlated innovations. Parameters ---------- covar_matrix : array Covariance matrix of the random variables time : int Sample size use_inverse : bool, optional Negate the off-diagonal elements and invert the covariance matrix before use Returns ------- noise : array Random noise generated according to covar_matrix """ # Pull out the number of nodes from the shape of the covar_matrix n_nodes = covar_matrix.shape[0] # Make a deep copy for use in the inverse case this_covar = covar_matrix # Take the negative inverse if needed if use_inverse: this_covar = copy.deepcopy(covar_matrix) this_covar *= -1 this_covar[np.diag_indices_from(this_covar)] *= -1 this_covar = np.linalg.inv(this_covar) # Return the noise distribution return np.random.multivariate_normal(mean=np.zeros(n_nodes), cov=this_covar, size=time)
def compute_distances(self, x1, x2): """ The method imputes the missing values as means and calls safe_sparse_dot. Imputation simplifies computation at a cost of (theoretically) slightly wrong distance between pairs of missing values. """ def prepare_data(x): if self.discrete.any(): data = Cosine.discrete_to_indicators(x, self.discrete) else: data = x.copy() for col, mean in enumerate(self.means): column = data[:, col] column[np.isnan(column)] = mean if self.axis == 0: data = data.T data /= row_norms(data)[:, np.newaxis] return data data1 = prepare_data(x1) data2 = data1 if x2 is None else prepare_data(x2) dist = safe_sparse_dot(data1, data2.T) np.clip(dist, 0, 1, out=dist) if x2 is None: diag = np.diag_indices_from(dist) dist[diag] = np.where(np.isnan(dist[diag]), np.nan, 1.0) return 1 - dist
def _setup_pop_meas_ana(self, start_dens, end_dens, O_meas): H0_vecs = self.H0_vecs # cache locally self.info("O_measure:") self.pprint(O_meas) O_meas = dot(dot(H0_vecs.conj().T, O_meas), H0_vecs) self.pprint(O_meas) self.info("Start_dens:") self.pprint(start_dens) start_dens = dot(dot(H0_vecs.conj().T, start_dens), H0_vecs) self.pprint(start_dens) self.info("End_dens ({0}):".format(end_dens.dtype)) self.pprint(end_dens) end_dens = dot(dot(H0_vecs.conj().T, end_dens), H0_vecs) self.pprint(end_dens) self._pop_base = diag(end_dens) self._pop_diff = diag(start_dens) - self._pop_base self._pop_meas = diag(O_meas) m_start_d = dot(start_dens, O_meas).trace() m_end_d = dot(end_dens, O_meas).trace() # m_start_p = dot(pop_diff + pop_base, pop_meas) mod_start_dens = start_dens.copy() mod_start_dens[diag_indices_from(mod_start_dens)] = 0 m_diff = dot(mod_start_dens, O_meas).trace() # m_err = (m_start_p - m_start_d) / (m_start_d - m_end_d) m_err = m_diff / (m_start_d - m_end_d)
def cosine_distances(X, Y=None): """Compute cosine distance between samples in X and Y. Cosine distance is defined as 1.0 minus the cosine similarity. Read more in the :ref:`User Guide <metrics>`. Parameters ---------- X : array_like, sparse matrix with shape (n_samples_X, n_features). Y : array_like, sparse matrix (optional) with shape (n_samples_Y, n_features). Returns ------- distance matrix : array An array with shape (n_samples_X, n_samples_Y). See also -------- sklearn.metrics.pairwise.cosine_similarity scipy.spatial.distance.cosine (dense matrices only) """ # 1.0 - cosine_similarity(X, Y) without copy S = cosine_similarity(X, Y) S *= -1 S += 1 np.clip(S, 0, 2, out=S) if X is Y or Y is None: # Ensure that distances between vectors and themselves are set to 0.0. # This may not be the case due to floating point rounding errors. S[np.diag_indices_from(S)] = 0.0 return S
def _add_relaxation(self, f_set, J0, J1, J2): H0_vecs = self.H0_vecs # cache locally J0ab = J0(self.w_diff) J1ab = J1(self.w_diff) J2ab = J2(self.w_diff) # pprint(J1ab) f2 = [] for A, Jq in zip(f_set, (J2ab, J1ab, J0ab, J1ab, J2ab)): A = dot(dot(H0_vecs.conj().T, A), H0_vecs) A *= A.conj() A *= Jq A = real_if_close(A) f2.append(A) f2 = array(f2) # pprint(f2) Rab = f2.sum(axis=0) diag_idx = diag_indices_from(Rab) Rab[diag_idx] = 0 assert allclose(Rab, Rab.T) Rab[diag_idx] = -Rab.sum(axis=1) self.info("Redfield matrix:") self.pprint(Rab) self.Rab_list.append(Rab)
def test_pairplot_reg(self): vars = ["x", "y", "z"] g = ag.pairplot(self.df, diag_kind="hist", kind="reg") for ax in g.diag_axes: nt.assert_equal(len(ax.patches), 10) for i, j in zip(*np.triu_indices_from(g.axes, 1)): ax = g.axes[i, j] x_in = self.df[vars[j]] y_in = self.df[vars[i]] x_out, y_out = ax.collections[0].get_offsets().T npt.assert_array_equal(x_in, x_out) npt.assert_array_equal(y_in, y_out) nt.assert_equal(len(ax.lines), 1) nt.assert_equal(len(ax.collections), 2) for i, j in zip(*np.tril_indices_from(g.axes, -1)): ax = g.axes[i, j] x_in = self.df[vars[j]] y_in = self.df[vars[i]] x_out, y_out = ax.collections[0].get_offsets().T npt.assert_array_equal(x_in, x_out) npt.assert_array_equal(y_in, y_out) nt.assert_equal(len(ax.lines), 1) nt.assert_equal(len(ax.collections), 2) for i, j in zip(*np.diag_indices_from(g.axes)): ax = g.axes[i, j] nt.assert_equal(len(ax.collections), 0)
def _sim_gp(tt, gp): """Simulates values from a sklearn GPR object """ K = gp.kernel(tt[:, None]) K[np.diag_indices_from(K)] += gp.alpha L = np.linalg.cholesky(K) return L.dot(np.random.normal(size=tt.size))
def diagonal_indices(a, offset=0): """The indices to the diagonal of a 2D array ``a`` The indices are those to the main diagonal (if ``offset`` is 0), or to a secondary diagonal, having the specified offset from the main one. The array ``A`` does not need to be square. **Parameters:** a : ndarray The 2D ndarray for which the diagonal indices should be calculated. offset : int, optional The diagonal offset from the main one. Note that the sup-diagonal is at offset +1, the sub-diagonal at offset -1, and so on. Defaults to 0, which corresponds to the main diagonal. **Returns:** xs, ys : tuples The indices in the two coordinates. Thanks to ``numpy``'s advanced slicing, the diagonal may be accessed with ``A[(xs, ys)]``. """ di, dj = np.diag_indices_from(a[:min(a.shape), :min(a.shape)]) if offset > 0: di, dj = zip(*[(i, j) for i, j in zip(di, dj + offset) if 0 <= j < a.shape[1]]) elif offset == 0: pass else: di, dj = zip(*[(i, j) for i, j in zip(di - offset, dj) if 0 <= i < a.shape[0]]) return di, dj
def nwin1_bet_returns(w, odds): assert len(w) == len(odds) R = w.reshape(1, -1).repeat(len(w), 0) R *= eye(R.shape[0]) - 1.0 ix = diag_indices_from(R) R[ix] = w * (odds - 1.0) return np.sum(R, 1)
def active_passive_collisions(active_tl, active_br, passive_tl, passive_br): ''' Returns an NxN array, where element at [i, j] says if thing i's active hitbox crosses thing j's active hitbox. An active hitbox isn't considered if any of its dimensions is not-positive. active/passive_tl/br must be arrays of shape (N, 2) - the boxes' corners in global coordinates See comment for passive_passive_collisions for longer explanation. The main difference is that we can't cheat here and do half the checks, then transpose, we need to do all checks. ''' passive_tl_3d = passive_tl.reshape(1, -1, 2) passive_br_3d = passive_br.reshape(1, -1, 2) active_tl_3d = active_tl.reshape(-1, 1, 2) active_br_3d = active_br.reshape(-1, 1, 2) negcheck = numpy.logical_or(numpy.any(active_tl_3d > passive_br_3d, axis=2), numpy.any(active_br_3d < passive_tl_3d, axis=2)) legible = numpy.all(active_tl < active_br, axis=1).reshape(-1, 1) result = numpy.logical_and(numpy.logical_not(negcheck), legible) # Remove self collisions result[numpy.diag_indices_from(result)] = False return result
def _test_solver(Solver, N=300, seed=1234, **kwargs): # Set up the solver. kernel = 1.0 * kernels.ExpSquaredKernel(1.0) solver = Solver(kernel, **kwargs) # Sample some data. np.random.seed(seed) x = np.atleast_2d(np.sort(10*np.random.randn(N))).T yerr = np.ones(N) solver.compute(x, yerr) # Build the matrix. K = kernel.get_value(x) K[np.diag_indices_from(K)] += yerr ** 2 # Check the determinant. sgn, lndet = np.linalg.slogdet(K) assert sgn == 1.0, "Invalid determinant" assert np.allclose(solver.log_determinant, lndet), "Incorrect determinant" y = np.sin(x[:, 0]) b0 = np.linalg.solve(K, y) b = solver.apply_inverse(y).flatten() assert np.allclose(b, b0) # Check the inverse. assert np.allclose(solver.apply_inverse(K), np.eye(N)), "Incorrect inverse"
def _R_matrix(p, odds): assert len(p) == len(odds) R = p.reshape(1, -1).repeat(len(p), 0) R *= eye(R.shape[0]) - 1.0 ix = diag_indices_from(R) R[ix] = p * (odds - 1.0) return R
def test_pairplot(self): vars = ["x", "y", "z"] g = ag.pairplot(self.df) for ax in g.diag_axes: assert len(ax.patches) > 1 for i, j in zip(*np.triu_indices_from(g.axes, 1)): ax = g.axes[i, j] x_in = self.df[vars[j]] y_in = self.df[vars[i]] x_out, y_out = ax.collections[0].get_offsets().T npt.assert_array_equal(x_in, x_out) npt.assert_array_equal(y_in, y_out) for i, j in zip(*np.tril_indices_from(g.axes, -1)): ax = g.axes[i, j] x_in = self.df[vars[j]] y_in = self.df[vars[i]] x_out, y_out = ax.collections[0].get_offsets().T npt.assert_array_equal(x_in, x_out) npt.assert_array_equal(y_in, y_out) for i, j in zip(*np.diag_indices_from(g.axes)): ax = g.axes[i, j] nt.assert_equal(len(ax.collections), 0) g = ag.pairplot(self.df, hue="a") n = len(self.df.a.unique()) for ax in g.diag_axes: assert len(ax.lines) == n assert len(ax.collections) == n
def _pipe_as_flow(self, signal_packet): # Get signal_packet details hkey = signal_packet.keys()[0] adj = signal_packet[hkey]['data'] # Add 1s along the diagonal to make positive definite adj[np.diag_indices_from(adj)] = 1 # Compute eigenvalues and eigenvectors, ensure they are real eigval, eigvec = np.linalg.eig(adj) eigval = np.real(eigval) eigvec = np.real(eigvec) # Sort largest to smallest eigenvalue sorted_idx = np.argsort(eigval)[::-1] largest_idx = sorted_idx[0] centrality = np.abs(eigvec[:, largest_idx]) centrality = centrality.reshape(-1, 1) # Dump into signal_packet new_packet = {} new_packet[hkey] = { 'data': centrality, 'meta': { 'ax_0': signal_packet[hkey]['meta']['ax_0'], 'time': signal_packet[hkey]['meta']['time'] } } return new_packet
def compute_adjacency_matrix_images(coord, sigma=0.1): coord = coord.reshape(-1, 2) dist = cdist(coord, coord) A = np.exp(-dist / (sigma * np.pi)**2) A[np.diag_indices_from(A)] = 0 return A
def _calculateLk(self, G01, D): Bk = dot(G01.T, ddot(D, G01, left=True)) Bk[NP.diag_indices_from(Bk)] += 1.0 Lk = cholesky(Bk, lower=True, check_finite=False) return Lk
def lnlike(theta): m, b = theta[:2] K = user_kernel(np.exp(theta[2:]), dx) K[np.diag_indices_from(K)] += ye2 return user_lnlike(y - (m * x + b), K)
def divide_diagonal_by_2(CHI0, div_fact=2.): CHI = CHI0.copy() CHI[np.diag_indices_from(CHI)] /= div_fact return CHI
def fit(self, X, y, previous_K=None, newData=1, previous_mats=None): #the previous_K stores the gram mat from last step while previous_mat=[A,B] where A is the secondNorm of the x_i-x_j and B is x_i dot x_j """Fit Gaussian process regression model. Parameters ---------- X : array-like, shape = (n_samples, n_features) Training data y : array-like, shape = (n_samples, [n_output_dims]) Target values newData controls how many new datapoints has been added from last training(Used for batchMode) Returns ------- self : returns an instance of self. """ self.newData = 1 t1 = time.time() if self.kernel is None: # Use an RBF kernel as default self.logger.info('Training with default rbf kernel') self.kernel_ = C(1.0, constant_value_bounds="fixed") \ * RBF(1.0, length_scale_bounds="fixed") else: self.logger.info('Training with customized kernel') #if previous_K is None: self.kernel_ = clone(self.kernel) self._rng = check_random_state(self.random_state) X, y = check_X_y(X, y, multi_output=True, y_numeric=True) self.logger.info('shape of the x is m=%d , n=%d', X.shape[0], X.shape[1]) # Normalize target value self.logger.info('start to normalize y value...') t3 = time.time() if self.normalize_y: self._y_train_mean = np.mean(y, axis=0) # demean y y = y - self._y_train_mean else: self._y_train_mean = np.zeros(1) if np.iterable(self.alpha) \ and self.alpha.shape[0] != y.shape[0]: if self.alpha.shape[0] == 1: self.alpha = self.alpha[0] else: raise ValueError( "alpha must be a scalar or an array" " with same number of entries as y.(%d != %d)" % (self.alpha.shape[0], y.shape[0])) self.X_train_ = np.copy(X) if self.copy_X_train else X self.y_train_ = np.copy(y) if self.copy_X_train else y t4 = time.time() self.logger.info("finish normalizing Y in----------- %s seconds", str(t4 - t3)) if self.optimizer is not None and self.kernel_.n_dims > 0: self.logger.info('hyper parameter of the kernel will be optimized') self.logger.info('optimizing the hyper parameter of the kernel') # Choose hyperparameters based on maximizing the log-marginal # likelihood (potentially starting from several initial values) def obj_func(theta, eval_gradient=True): if eval_gradient: lml, grad = self.log_marginal_likelihood( theta, eval_gradient=True, pre_K=previous_K) return -lml, -grad else: return -self.log_marginal_likelihood(theta) # First optimize starting from theta specified in kernel optima = [(self._constrained_optimization(obj_func, self.kernel_.theta, self.kernel_.bounds))] # Additional runs are performed from log-uniform chosen initial # theta if self.n_restarts_optimizer > 0: if not np.isfinite(self.kernel_.bounds).all(): raise ValueError( "Multiple optimizer restarts (n_restarts_optimizer>0) " "requires that all bounds are finite.") bounds = self.kernel_.bounds for iteration in range(self.n_restarts_optimizer): theta_initial = \ self._rng.uniform(bounds[:, 0], bounds[:, 1]) optima.append( self._constrained_optimization(obj_func, theta_initial, bounds)) # Select result from run with minimal (negative) log-marginal # likelihood lml_values = list(map(itemgetter(1), optima)) self.kernel_.theta = optima[np.argmin(lml_values)][0] self.log_marginal_likelihood_value_ = -np.min(lml_values) t2 = time.time() self.logger.info( "finish opt hyper-para of kernel in----------- %s seconds", str(t2 - t4)) else: self.logger.info('hyper parameter of the kernel will be fixed') self.log_marginal_likelihood_value_ = \ self.log_marginal_likelihood(self.kernel_.theta) # Precompute quantities required for predictions which are independent # of actual query points #This is the most time consuming part? t6 = time.time() if (previous_K is not None): self.logger.info('use previous K_n and x_new to compute K_n+1') col = self.kernel_(X[0:X.shape[0] - newData, :], X[X.shape[0] - newData:X.shape[0], :]) K = np.concatenate((previous_K, col), axis=1) row = np.concatenate( (col.T, self.kernel_(X[X.shape[0] - newData:X.shape[0], :], X[X.shape[0] - newData:X.shape[0], :])), axis=1) K = np.concatenate((K, row), axis=0) self.K_return = K elif (previous_mats is not None): #update A A = previous_mats[0] B = previous_mats[1] trainInd = range(int(X.shape[0]) - newData) sampleInd = range(int(X.shape[0]) - newData, int(X.shape[0])) Acol = cdist(X[trainInd, :], X[sampleInd, :], metric='sqeuclidean') Aone = cdist(X[sampleInd, :], X[sampleInd, :], metric='sqeuclidean') Arow = np.concatenate((Acol.transpose(), Aone), axis=1) #print('aaaa'+str(A.shape)) #print('acol'+str(Acol.shape)) A = np.concatenate((A, Acol), axis=1) A = np.concatenate((A, Arow), axis=0) #update B Bcol = np.inner(X[trainInd, :], X[sampleInd, :]) Bone = np.inner(X[sampleInd, :], X[sampleInd, :]) Brow = np.concatenate((Bcol.transpose(), Bone), axis=1) B = np.concatenate((B, Bcol), axis=1) B = np.concatenate((B, Brow), axis=0) #compute gram #note theta are in log format thetas = self.kernel_.theta thetas = np.exp(thetas) #rbf part krbf = np.exp(A * (-0.5) / (thetas[1]**2)) np.fill_diagonal(krbf, 1) krbf = thetas[0] * krbf #dot product part kdot = B + thetas[2]**2 kdot = kdot * thetas[3] #note that we changed custom kernel, thetas[4] no longer exist #self.K_return=krbf+kdot+np.ones(kdot.shape)*thetas[4] self.K_return = krbf + kdot K = self.K_return #also save [A,B] self.mats_return = [A, B] else: K = self.kernel_(self.X_train_) self.K_return = K A = pdist(self.X_train_, metric='sqeuclidean' ) #this is the flatten upper triangular ||xi-xj||_2 A = squareform(A) B = np.inner(self.X_train_, self.X_train_) self.mats_return = [A, B] K[np.diag_indices_from(K)] += self.alpha t7 = time.time() self.logger.info("compute matrix K takes----------- %s seconds", str(t7 - t6)) try: self.L_ = cholesky(K, lower=True) # Line 2 except np.linalg.LinAlgError as exc: exc.args = ("The kernel, %s, is not returning a " "positive definite matrix. Try gradually " "increasing the 'alpha' parameter of your " "GaussianProcessRegressor estimator." % self.kernel_, ) + exc.args raise self.alpha_ = cho_solve((self.L_, True), self.y_train_) # Line 3 t5 = time.time() #print("retrain in (compute matrix K and invers and det) takes----------- seconds:"+ str(t5-t6)) self.logger.info("compute K.inv*t takes----------- %s seconds", str(t5 - t7)) self.logger.info('training ends in %s seconds-----------', str(t5 - t1)) return self
def add_data_async(self, fX, fT, istart=0, icount=np.inf, fHH=None, fHT=None): """Version of `add_data()` with asyncronous I/O. See `add_data()` for reference. Spawns new processes using Python's `multiprocessing` module, and requires more memory than non-async version. """ # initialize assert len(self.nnet.neurons) > 0, "Add neurons to ELM before using it" X, T = self._checkdata(fX, fT) N = X.shape[0] # TODO: adapt for GPU solver _prepare_fHH(fHH, fHT, self.nnet, self.precision) # custom range adjustments icount = min(icount, N - istart) nb = int(np.ceil(float(icount) / self.batch)) # weighted classification initialization if self.classification == "wc" and self.wc is None: ns = np.zeros((self.nnet.outputs, )) for b in xrange(nb): # batch sum is much faster start = b * self.batch + istart stop = min((b + 1) * self.batch + istart, icount + istart) ns += T[start:stop].sum(axis=0) ns = ns.astype(self.precision) self.wc = ns.sum( ) / ns # class weights normalized to number of samples # close X and T files opened by _checkdata() h5 = self.opened_hdf5.pop() h5.close() h5 = self.opened_hdf5.pop() h5.close() # start async reader and writer for HDF5 files qX_in = mp.Queue() qX_out = mp.Queue(1) readerX = mp.Process(target=_ireader, args=(fX, qX_in, qX_out)) readerX.daemon = True readerX.start() qT_in = mp.Queue() qT_out = mp.Queue(1) readerT = mp.Process(target=_ireader, args=(fT, qT_in, qT_out)) readerT.daemon = True readerT.start() # main loop over all the data t = time() t0 = time() wc_vector = None for b in xrange(0, nb + 1): start_next = b * self.batch + istart stop_next = min((b + 1) * self.batch + istart, icount + istart) # prefetch data qX_in.put((start_next, stop_next)) # asyncronous reading of next data batch qT_in.put((start_next, stop_next)) if b > 0: # first iteration only prefetches data Xb = qX_out.get() Tb = qT_out.get() if self.classification == "wc": wc_vector = self.wc[np.where( Tb == 1)[1]] # weights for samples in the batch self.nnet.add_batch(Xb, Tb, wc_vector) # report time eta = int(((time() - t0) / (b + 1)) * (nb - b - 1)) if time() - t > self.tprint: print("processing batch %d/%d, eta %d:%02d:%02d" % (b + 1, nb, eta / 3600, (eta % 3600) / 60, eta % 60)) t = time() # close async reader and writer readerX.join() readerT.join() # if storing output to disk if fHH is not None and fHT is not None: HH, HT = self.nnet.get_corr() HH[np.diag_indices_from( HH)] -= self.nnet.norm # norm is already included _write_fHH(fHH, fHT, HH, HT)
def __init__(self, transition_models, transition_prob=None): # save attributes self.transition_models = transition_models self.transition_prob = transition_prob num_patterns = len(transition_models) # first stack all transition models first_states = [] last_states = [] for p, tm in enumerate(self.transition_models): # set/update the probabilities, states and pointers offset = 0 if p == 0: # for the first pattern, just use the TM arrays states = tm.states pointers = tm.pointers probabilities = tm.probabilities else: # for all consecutive patterns, stack the TM arrays after # applying an offset # Note: len(pointers) = len(states) + 1, because of the CSR # format of the TM (please see ml.hmm.TransitionModel) offset = len(pointers) - 1 # states: offset = length of the pointers - 1 states = np.hstack((states, tm.states + len(pointers) - 1)) # pointers: offset = current maximum of the pointers # start = tm.pointers[1:] pointers = np.hstack((pointers, tm.pointers[1:] + max(pointers))) # probabilities: just stack them probabilities = np.hstack((probabilities, tm.probabilities)) # save the first/last states first_states.append(tm.state_space.first_states[0] + offset) last_states.append(tm.state_space.last_states[-1] + offset) # retrieve a dense representation in order to add transitions # TODO: operate directly on the sparse representation? states, prev_states, probabilities = self.make_dense(states, pointers, probabilities) # translate float transition_prob value to transition_prob matrix if isinstance(transition_prob, float) and transition_prob: # create a pattern transition probability matrix self.transition_prob = np.ones((num_patterns, num_patterns)) # transition to other patterns self.transition_prob *= transition_prob / (num_patterns - 1) # transition to same pattern diag = np.diag_indices_from(self.transition_prob) self.transition_prob[diag] = 1. - transition_prob else: self.transition_prob = transition_prob # update/add transitions between patterns if self.transition_prob is not None and num_patterns > 1: new_states = [] new_prev_states = [] new_probabilities = [] for p in range(num_patterns): # indices of states/prev_states/probabilities idx = np.logical_and(np.in1d(prev_states, last_states[p]), np.in1d(states, first_states[p])) # transition probability prob = probabilities[idx] # update transitions to same pattern with new probability probabilities[idx] *= self.transition_prob[p, p] # distribute that part among all other patterns for p_ in np.setdiff1d(range(num_patterns), p): idx_ = np.logical_and( np.in1d(prev_states, last_states[p_]), np.in1d(states, first_states[p_])) # make sure idx and idx_ have same length if len(np.nonzero(idx)[0]) != len(np.nonzero(idx_)[0]): raise ValueError('Cannot add transition between ' 'patterns with different number of ' 'entering/exiting states.') # use idx for the states and idx_ for prev_states new_states.extend(states[idx]) new_prev_states.extend(prev_states[idx_]) new_probabilities.extend(prob * self.transition_prob[p, p_]) # extend the arrays by these new transitions states = np.append(states, new_states) prev_states = np.append(prev_states, new_prev_states) probabilities = np.append(probabilities, new_probabilities) # make the transitions sparse transitions = self.make_sparse(states, prev_states, probabilities) # instantiate a TransitionModel super(MultiPatternTransitionModel, self).__init__(*transitions)
def plot_correlation_matrix(df, columns, figsize=(8,8), annot=True, errors=True, nbs=100,# fmt='.2g', mask_diagonal=True, mask_upper_triangle=True): ''' Create a plot of the correlation matrix for (continous) data columns (or features) of a dataframe (df) @params: df - Pandas data frame columns - Columns of data frame to include in matrix annot - Should the value of the correlation appear in the cell? errors - Calculate errors via bootstrap resampling nbs - Number of bootstrap realisations #fmt - Format for annotations mask_diagonral - Mask the matrix diagonal (all 1's) mask_upper_triangle - Mask the (copy) upper triangle ''' # Calculate correlation coefficients corr = df[columns].corr() if annot and errors: # Calculate errors via bootstrap std = _bootstrap_correlation_errors(df, columns, n=nbs) notes = [] for i in range(len(columns)): # Create annotations for heatmap note = [] for j in range(len(columns)): note.append('$%.2g \pm %.2g$'%(np.array(corr)[i, j], std[i, j])) notes.append(note) notes = pd.DataFrame(notes, index=corr.index, columns=corr.columns) # Apply mask if mask_diagonal and mask_upper_triangle: corr.drop(labels=columns[0], axis=0, inplace=True) # Remove first row corr.drop(labels=columns[-1], axis=1, inplace=True) # Remove last column if annot and errors: notes.drop(labels=columns[0], axis=0, inplace=True) # Remove first row notes.drop(labels=columns[-1], axis=1, inplace=True) # Remove last column # Create mask mask = np.zeros_like(corr, dtype=bool) if mask_upper_triangle and mask_diagonal: # k=1 does diagonal offset from centre mask[np.triu_indices_from(mask, k=1)] = True elif mask_upper_triangle: mask[np.triu_indices_from(mask, k=1)] = True elif mask_diagonal: mask[np.diag_indices_from(mask)] = True if annot and errors: fmt = '' else: fmt='.2g' notes = annot # Make the plot plt.style.use('seaborn-white') plt.figure(figsize=figsize) cmap = sns.diverging_palette(220, 10, as_cmap=True) g = sns.heatmap(corr, vmin=-1., vmax=1., cmap=cmap, mask=mask, linewidths=.5, annot=notes, fmt=fmt, square=True, cbar=False, ) # Centre y-axis ticks g.set_yticklabels(labels=g.get_yticklabels(), va='center')
def getK(self): x = self.X K = (self.kernelX_module(x) + self.linear_kernelY_1_module(x)).evaluate() K[np.diag_indices_from(K)] += self.getNoise() return K
def fit(self, X, y): """Fit Gaussian process regression model. Args: X: Training data y: Target values Returns: self : returns an instance of self. """ self.kernel_ = clone(self.kernel) self._rng = check_random_state(self.random_state) #X, y = check_X_y(X, y, multi_output=True, y_numeric=True) # Normalize target value if self.normalize_y: self._y_train_mean = np.mean(y, axis=0) # demean y y = y - self._y_train_mean else: self._y_train_mean = np.zeros(1) if np.iterable(self.alpha) \ and self.alpha.shape[0] != y.shape[0]: if self.alpha.shape[0] == 1: self.alpha = self.alpha[0] else: raise ValueError( "alpha must be a scalar or an array" " with same number of entries as y.(%d != %d)" % (self.alpha.shape[0], y.shape[0])) self.X_train_ = np.copy(X) if self.copy_X_train else X self.y_train_ = np.copy(y) if self.copy_X_train else y if self.optimizer is not None and self.kernel_.n_dims > 0: # Choose hyperparameters based on maximizing the log-marginal # likelihood (potentially starting from several initial values) def obj_func(theta, eval_gradient=True): if eval_gradient: lml, grad = self.log_marginal_likelihood( theta, eval_gradient=True) return -lml, -grad else: return -self.log_marginal_likelihood(theta) # First optimize starting from theta specified in kernel optima = [(self._constrained_optimization(obj_func, self.kernel_.theta, self.kernel_.bounds))] # Additional runs are performed from log-uniform chosen initial # theta if self.n_restarts_optimizer > 0: if not np.isfinite(self.kernel_.bounds).all(): raise ValueError( "Multiple optimizer restarts (n_restarts_optimizer>0) " "requires that all bounds are finite.") bounds = self.kernel_.bounds for iteration in range(self.n_restarts_optimizer): theta_initial = \ self._rng.uniform(bounds[:, 0], bounds[:, 1]) optima.append( self._constrained_optimization(obj_func, theta_initial, bounds)) # Select result from run with minimal (negative) log-marginal # likelihood lml_values = list(map(itemgetter(1), optima)) self.kernel_.theta = optima[np.argmin(lml_values)][0] self.log_marginal_likelihood_value_ = -np.min(lml_values) else: self.log_marginal_likelihood_value_ = \ self.log_marginal_likelihood(self.kernel_.theta) # Precompute quantities required for predictions which are independent # of actual query points K = self.kernel_(self.X_train_) K[np.diag_indices_from(K)] += self.alpha try: self.L_ = cholesky(K, lower=True) # Line 2 except np.linalg.LinAlgError as exc: exc.args = ("The kernel, %s, is not returning a " "positive definite matrix. Try gradually " "increasing the 'alpha' parameter of your " "GaussianProcessRegressor estimator." % self.kernel_, ) + exc.args raise self.alpha_ = cho_solve((self.L_, True), self.y_train_) # Line 3 return self
def fit(self, X, y): """Fit Gaussian process regression model. Parameters ---------- X : array-like of shape (n_samples, n_features) or list of object Feature vectors or other representations of training data. y : array-like of shape (n_samples,) or (n_samples, n_targets) Target values Returns ------- self : returns an instance of self. """ if self.kernel is None: # Use an RBF kernel as default self.kernel_ = C(1.0, constant_value_bounds="fixed") \ * RBF(1.0, length_scale_bounds="fixed") else: self.kernel_ = clone(self.kernel) self._rng = check_random_state(self.random_state) if self.kernel_.requires_vector_input: X, y = self._validate_data(X, y, multi_output=True, y_numeric=True, ensure_2d=True, dtype="numeric") else: X, y = self._validate_data(X, y, multi_output=True, y_numeric=True, ensure_2d=False, dtype=None) # Normalize target value if self.normalize_y: self._y_train_mean = np.mean(y, axis=0) self._y_train_std = _handle_zeros_in_scale(np.std(y, axis=0), copy=False) # Remove mean and make unit variance y = (y - self._y_train_mean) / self._y_train_std else: self._y_train_mean = np.zeros(1) self._y_train_std = 1 if np.iterable(self.alpha) \ and self.alpha.shape[0] != y.shape[0]: if self.alpha.shape[0] == 1: self.alpha = self.alpha[0] else: raise ValueError( "alpha must be a scalar or an array " "with same number of entries as y. (%d != %d)" % (self.alpha.shape[0], y.shape[0])) self.X_train_ = np.copy(X) if self.copy_X_train else X self.y_train_ = np.copy(y) if self.copy_X_train else y if self.optimizer is not None and self.kernel_.n_dims > 0: # Choose hyperparameters based on maximizing the log-marginal # likelihood (potentially starting from several initial values) def obj_func(theta, eval_gradient=True): if eval_gradient: lml, grad = self.log_marginal_likelihood( theta, eval_gradient=True, clone_kernel=False) return -lml, -grad else: return -self.log_marginal_likelihood(theta, clone_kernel=False) # First optimize starting from theta specified in kernel optima = [(self._constrained_optimization(obj_func, self.kernel_.theta, self.kernel_.bounds))] # Additional runs are performed from log-uniform chosen initial # theta if self.n_restarts_optimizer > 0: if not np.isfinite(self.kernel_.bounds).all(): raise ValueError( "Multiple optimizer restarts (n_restarts_optimizer>0) " "requires that all bounds are finite.") bounds = self.kernel_.bounds for iteration in range(self.n_restarts_optimizer): theta_initial = \ self._rng.uniform(bounds[:, 0], bounds[:, 1]) optima.append( self._constrained_optimization(obj_func, theta_initial, bounds)) # Select result from run with minimal (negative) log-marginal # likelihood lml_values = list(map(itemgetter(1), optima)) self.kernel_.theta = optima[np.argmin(lml_values)][0] self.kernel_._check_bounds_params() self.log_marginal_likelihood_value_ = -np.min(lml_values) else: self.log_marginal_likelihood_value_ = \ self.log_marginal_likelihood(self.kernel_.theta, clone_kernel=False) # Precompute quantities required for predictions which are independent # of actual query points K = self.kernel_(self.X_train_) K[np.diag_indices_from(K)] += self.alpha try: self.L_ = cholesky(K, lower=True) # Line 2 # self.L_ changed, self._K_inv needs to be recomputed self._K_inv = None except np.linalg.LinAlgError as exc: exc.args = ("The kernel, %s, is not returning a " "positive definite matrix. Try gradually " "increasing the 'alpha' parameter of your " "GaussianProcessRegressor estimator." % self.kernel_, ) + exc.args raise self.alpha_ = cho_solve((self.L_, True), self.y_train_) # Line 3 return self
def log_marginal_likelihood(self, theta=None, eval_gradient=False, clone_kernel=True): """Returns log-marginal likelihood of theta for training data. Parameters ---------- theta : array-like of shape (n_kernel_params,) default=None Kernel hyperparameters for which the log-marginal likelihood is evaluated. If None, the precomputed log_marginal_likelihood of ``self.kernel_.theta`` is returned. eval_gradient : bool, default=False If True, the gradient of the log-marginal likelihood with respect to the kernel hyperparameters at position theta is returned additionally. If True, theta must not be None. clone_kernel : bool, default=True If True, the kernel attribute is copied. If False, the kernel attribute is modified, but may result in a performance improvement. Returns ------- log_likelihood : float Log-marginal likelihood of theta for training data. log_likelihood_gradient : ndarray of shape (n_kernel_params,), optional Gradient of the log-marginal likelihood with respect to the kernel hyperparameters at position theta. Only returned when eval_gradient is True. """ if theta is None: if eval_gradient: raise ValueError( "Gradient can only be evaluated for theta!=None") return self.log_marginal_likelihood_value_ if clone_kernel: kernel = self.kernel_.clone_with_theta(theta) else: kernel = self.kernel_ kernel.theta = theta if eval_gradient: K, K_gradient = kernel(self.X_train_, eval_gradient=True) else: K = kernel(self.X_train_) K[np.diag_indices_from(K)] += self.alpha try: L = cholesky(K, lower=True) # Line 2 except np.linalg.LinAlgError: return (-np.inf, np.zeros_like(theta)) \ if eval_gradient else -np.inf # Support multi-dimensional output of self.y_train_ y_train = self.y_train_ if y_train.ndim == 1: y_train = y_train[:, np.newaxis] alpha = cho_solve((L, True), y_train) # Line 3 # Compute log-likelihood (compare line 7) log_likelihood_dims = -0.5 * np.einsum("ik,ik->k", y_train, alpha) log_likelihood_dims -= np.log(np.diag(L)).sum() log_likelihood_dims -= K.shape[0] / 2 * np.log(2 * np.pi) log_likelihood = log_likelihood_dims.sum(-1) # sum over dimensions if eval_gradient: # compare Equation 5.9 from GPML tmp = np.einsum("ik,jk->ijk", alpha, alpha) # k: output-dimension tmp -= cho_solve((L, True), np.eye(K.shape[0]))[:, :, np.newaxis] # Compute "0.5 * trace(tmp.dot(K_gradient))" without # constructing the full matrix tmp.dot(K_gradient) since only # its diagonal is required log_likelihood_gradient_dims = \ 0.5 * np.einsum("ijl,jik->kl", tmp, K_gradient) log_likelihood_gradient = log_likelihood_gradient_dims.sum(-1) if eval_gradient: return log_likelihood, log_likelihood_gradient else: return log_likelihood
reveal_type(np.index_exp[0:1]) # E: Tuple[builtins.slice] reveal_type(np.index_exp[0:1, None:3]) # E: Tuple[builtins.slice, builtins.slice] reveal_type( np.index_exp[0, 0:1, ..., [0, 1, 3]] ) # E: Tuple[Literal[0]?, builtins.slice, builtins.ellipsis, builtins.list[builtins.int]] reveal_type(np.s_[0:1]) # E: builtins.slice reveal_type(np.s_[0:1, None:3]) # E: Tuple[builtins.slice, builtins.slice] reveal_type( np.s_[0, 0:1, ..., [0, 1, 3]] ) # E: Tuple[Literal[0]?, builtins.slice, builtins.ellipsis, builtins.list[builtins.int]] reveal_type(np.ix_( AR_LIKE_b)) # E: tuple[numpy.ndarray[Any, numpy.dtype[numpy.bool_]]] reveal_type( np.ix_(AR_LIKE_i, AR_LIKE_f)) # E: tuple[numpy.ndarray[Any, numpy.dtype[{double}]]] reveal_type( np.ix_(AR_i8)) # E: tuple[numpy.ndarray[Any, numpy.dtype[{int64}]]] reveal_type(np.fill_diagonal(AR_i8, 5)) # E: None reveal_type( np.diag_indices(4)) # E: tuple[numpy.ndarray[Any, numpy.dtype[{int_}]]] reveal_type(np.diag_indices( 2, 3)) # E: tuple[numpy.ndarray[Any, numpy.dtype[{int_}]]] reveal_type(np.diag_indices_from( AR_i8)) # E: tuple[numpy.ndarray[Any, numpy.dtype[{int_}]]]
Biajb -= np.einsum('ij,ab->iajb', moF[:ndocc:, :ndocc], np.diag(np.ones(nvirt))) Biajb += 4 * MO[:, ndocc:, :ndocc, ndocc:] Biajb -= MO[:, ndocc:, :ndocc, ndocc:].swapaxes(0, 2) Biajb -= MO[:, :ndocc, ndocc:, ndocc:].swapaxes(1, 2) Biajb *= 4 # Invert B, (o^3 v^3) Binv = np.linalg.inv(Biajb.reshape(ndocc * nvirt, -1)).reshape(ndocc, nvirt, ndocc, nvirt) # Build orbital rotation matrix x = np.einsum('iajb,ia->jb', Binv, gn) U = np.zeros_like(hf.Ca) U[:ndocc, ndocc:] = x U[ndocc:, :ndocc] = -x.T U += 0.5 * np.dot(U, U) U[np.diag_indices_from(hf.A)] += 1 # Easy access to Schmidt orthogonalization U, r = np.linalg.qr(U.T) # Rotate and set orbitals C = hf.Ca.dot(U) hf.set_Cleft(C) iter_type = 'SOSCF' print('Total time taken for SCF iterations: %.3f seconds \n' % (time.time() - t)) print('Final SCF energy: %.8f hartree' % hf.scf_e) # Compare to Psi4 SCF_E_psi = psi4.energy('SCF')
def log_marginal_likelihood(self, theta=None, eval_gradient=False, pre_K=None): """Returns log-marginal likelihood of theta for training data. Parameters ---------- theta : array-like, shape = (n_kernel_params,) or None Kernel hyperparameters for which the log-marginal likelihood is evaluated. If None, the precomputed log_marginal_likelihood of ``self.kernel_.theta`` is returned. eval_gradient : bool, default: False If True, the gradient of the log-marginal likelihood with respect to the kernel hyperparameters at position theta is returned additionally. If True, theta must not be None. Returns ------- log_likelihood : float Log-marginal likelihood of theta for training data. log_likelihood_gradient : array, shape = (n_kernel_params,), optional Gradient of the log-marginal likelihood with respect to the kernel hyperparameters at position theta. Only returned when eval_gradient is True. """ t1 = time.time() if theta is None: if eval_gradient: raise ValueError( "Gradient can only be evaluated for theta!=None") return self.log_marginal_likelihood_value_ kernel = self.kernel_.clone_with_theta(theta) #kernel.dists_mat=self.kernel_.dists_mat if eval_gradient: #use incremental way of computing K # if(previous_K is not None): # self.logger.info('In computing log marginal likelihood:use previous K_n and x_new to compute K_n+1'); # #pass previous K to kernel function kernel.__call__ to compute new K # K= np.concatenate((previous_K,self.kernel_(X[0:X.shape[0]-1,:],X[[X.shape[0]-1],:])),axis=1); # row=np.concatenate((self.kernel_(X[0:X.shape[0]-1,:],X[[X.shape[0]-1],:]).T,self.kernel_(X[[X.shape[0]-1],:],X[[X.shape[0]-1],:])),axis=1) # K=np.concatenate((K,row),axis=0) # self.K_return=K; K, K_gradient = kernel(self.X_train_, eval_gradient=True) #self.kernel_.dists_mat=kernel.dists_mat; else: if (pre_K is None): K = kernel(self.X_train_) else: #fast compute K note that after fit() of the model has been called, the A,B returned from A,B have already have the correct size. we just need to compute the K(do not need to update,A,B like we did in fit.) A = pre_K[0] B = pre_K[1] #compute gram #note theta are in log format thetas = np.exp(theta) #rbf part krbf = np.exp(A * (-0.5) / (thetas[1]**2)) np.fill_diagonal(krbf, 1) krbf = thetas[0] * krbf #dot product part kdot = B + thetas[2]**2 kdot = kdot * thetas[3] #note that we changed custom kernel, thetas[4] no longer exist #self.K_return=krbf+kdot+np.ones(kdot.shape)*thetas[4] K = krbf + kdot #also save [A,B] K[np.diag_indices_from(K)] += self.alpha t2 = time.time() self.logger.info('compute K in logLikelihood in %s sec', str(t2 - t1)) try: L = cholesky(K, lower=True) # Line 2 except np.linalg.LinAlgError: return (-np.inf, np.zeros_like(theta)) \ if eval_gradient else -np.inf # Support multi-dimensional output of self.y_train_ y_train = self.y_train_ if y_train.ndim == 1: y_train = y_train[:, np.newaxis] alpha = cho_solve((L, True), y_train) # Line 3 # Compute log-likelihood (compare line 7) log_likelihood_dims = -0.5 * np.einsum("ik,ik->k", y_train, alpha) log_likelihood_dims -= np.log(np.diag(L)).sum() log_likelihood_dims -= K.shape[0] / 2 * np.log(2 * np.pi) log_likelihood = log_likelihood_dims.sum(-1) # sum over dimensions if eval_gradient: # compare Equation 5.9 from GPML tmp = np.einsum("ik,jk->ijk", alpha, alpha) # k: output-dimension tmp -= cho_solve((L, True), np.eye(K.shape[0]))[:, :, np.newaxis] # Compute "0.5 * trace(tmp.dot(K_gradient))" without # constructing the full matrix tmp.dot(K_gradient) since only # its diagonal is required log_likelihood_gradient_dims = \ 0.5 * np.einsum("ijl,ijk->kl", tmp, K_gradient) log_likelihood_gradient = log_likelihood_gradient_dims.sum(-1) t3 = time.time() self.logger.info('logLikelihood computation finished in %s sec', str(t3 - t1)) if eval_gradient: return log_likelihood, log_likelihood_gradient else: return log_likelihood
import numpy as np import Graphics as artist import matplotlib.pyplot as plt from mpl_toolkits.axes_grid1 import make_axes_locatable plt.xkcd() data = np.load( 'interaction_matrix-2015-11-28-w-deduped-curated-drug-names.npy') print data[:10, 3] data[np.diag_indices_from(data)] /= 2. #Forgot to tell you this- MC data = np.log(1. + data) #log because decreasing marginal importance data = (data - data.min(axis=1)) / (data.max(axis=1) - data.min(axis=1)) cutoff = 10 eig_vals, eig_vecs = np.linalg.eigh(data) #Wrong function idx = np.argsort(eig_vals) # sorting the eigenvalues idx = idx[::-1] # in ascending order # sorting eigenvectors according to the sorted eigenvalues eig_vecs = eig_vecs[:, idx] eig_vecs = eig_vecs[:, range(cutoff)] eig_vals = eig_vals[idx] # sorting eigenvalues score = np.dot(eig_vecs.T, data) # projection of the data in the new space eig_vals /= eig_vals.max() print eig_vals[:10] fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(14, 6.5)) #-- Raw data
def test_krr_gaussian_local_cmat(): test_dir = os.path.dirname(os.path.realpath(__file__)) # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames data = get_energies(test_dir + "/data/hof_qm7.txt") # Generate a list of qml.data.Compound() objects" mols = [] for xyz_file in sorted(data.keys())[:1000]: # Initialize the qml.data.Compound() objects mol = qml.data.Compound(xyz=test_dir + "/qm7/" + xyz_file) # Associate a property (heat of formation) with the object mol.properties = data[xyz_file] # This is a Molecular Coulomb matrix sorted by row norm mol.generate_atomic_coulomb_matrix(size=23, sorting="row-norm") mols.append(mol) # Shuffle molecules np.random.seed(666) np.random.shuffle(mols) # Make training and test sets n_test = 100 n_train = 200 training = mols[:n_train] test = mols[-n_test:] X = np.concatenate([mol.representation for mol in training]) Xs = np.concatenate([mol.representation for mol in test]) N = np.array([mol.natoms for mol in training]) Ns = np.array([mol.natoms for mol in test]) # List of properties Y = np.array([mol.properties for mol in training]) Ys = np.array([mol.properties for mol in test]) # Set hyper-parameters sigma = 724.0 llambda = 10**(-6.5) K = get_local_kernels_gaussian(X, X, N, N, [sigma])[0] assert np.allclose(K, K.T), "Error in local Gaussian kernel symmetry" K_test = np.loadtxt(test_dir + "/data/K_local_gaussian.txt") assert np.allclose( K, K_test), "Error in local Gaussian kernel (vs. reference)" K_test = get_atomic_kernels_gaussian(training, training, [sigma])[0] assert np.allclose(K, K_test), "Error in local Gaussian kernel (vs. wrapper)" # Solve alpha K[np.diag_indices_from(K)] += llambda alpha = cho_solve(K, Y) # Calculate prediction kernel Ks = get_local_kernels_gaussian(Xs, X, Ns, N, [sigma])[0] Ks_test = np.loadtxt(test_dir + "/data/Ks_local_gaussian.txt") # Somtimes a few coulomb matrices differ because of parallel sorting and numerical error # Allow up to 5 molecules to differ from the supplied reference. differences_count = len(set(np.where(Ks - Ks_test > 1e-7)[0])) assert differences_count < 5, "Error in local Laplacian kernel (vs. reference)" # assert np.allclose(Ks, Ks_test), "Error in local Gaussian kernel (vs. reference)" Ks_test = get_atomic_kernels_gaussian(test, training, [sigma])[0] assert np.allclose(Ks, Ks_test), "Error in local Gaussian kernel (vs. wrapper)" Yss = np.dot(Ks, alpha) mae = np.mean(np.abs(Ys - Yss)) print(mae) assert abs(19.0 - mae) < 1.0, "Error in local Gaussian kernel-ridge regression"
def run_simulation(sim_params: ParamDict, _current_results_dir: Path) -> SimResults: from matplotlib.gridspec import GridSpec from simulator.params import PlasticityTypeItoE, PlasticityTypeEtoI, IsPlastic import numpy as np from time import time from simulator.utils import allocate_aligned from simulator.rates import train_network, estimate_responses from simulator.setup import make_afferents, make_synapses from simulator.params import AllParameters from simulator.params import VectorE, VectorI, ArrayIE, ArrayEE, ArrayEI from simulator.plasticity import MomentEstimate from simulator.measure import orientation_selectivity_index, compute_syn_current_spearmanr, compute_response_similarity t0 = time() dtype = AllParameters.float_type params = AllParameters(**sim_params) inp = params.inp ni = params.numint ng = params.ng pl = params.pl n_e = ng.exc.n_per_axis ** ng.n_d n_i = n_e // 8 dt_tau_e = ni.dt / ng.exc.tau_m dt_tau_i = ni.dt / ng.inh.tau_m max_dr_dt_exc = dt_tau_e * ni.max_dr max_dr_dt_inh = dt_tau_i * ni.max_dr if pl.is_plastic == IsPlastic.INH: pl.eta_e = dtype(0) elif pl.is_plastic == IsPlastic.EXC: pl.eta_i = dtype(0) elif pl.is_plastic == IsPlastic.NEITHER: pl.eta_e = pl.eta_i = dtype(0) ni.n_trials = 0 aff_arrays = make_afferents(ng.n_d, ng.exc.n_per_axis, inp.n_stimuli, inp.exc.bg_input, inp.exc.peak_stimulus, inp.vonmises_kappa, PLOT_AFFERENTS) if params.sy.e2e.w_total > 0: corr_kappa = inp.vonmises_kappa / 4 tmp_arrays = make_afferents(ng.n_d, ng.exc.n_per_axis, inp.n_stimuli, inp.exc.bg_input, inp.exc.peak_stimulus, corr_kappa, PLOT_AFFERENTS) flattened = np.zeros((n_e, inp.n_stimuli ** ng.n_d), dtype=dtype) for n in range(n_e): flattened[n, :] = tmp_arrays.afferents[..., n].flatten() flattened = flattened.astype(dtype) target_correlations = np.corrcoef(flattened) target_correlations = ArrayEE(target_correlations.astype(dtype)) else: target_correlations = None sya = make_synapses(params, n_e, n_i, target_correlations, plot_weight_hist=PLOT_WEIGHT_HIST) wei_init = sya.wei.copy() wie_init = sya.wie.copy() pop_in = aff_arrays.afferents.sum(axis=-1) print(f"Sharp: {inp.sharp_input} Avg in: {pop_in.mean():.1f}, std: {pop_in.std():.1f}") per_exc = aff_arrays.afferents.sum(axis=(0, 1, 2)) print(f"Per neuron. Avg in: {per_exc.mean()}, std: {per_exc.std():.1f}") recording_re = allocate_aligned((n_e, ni.max_steps), dtype=dtype) recording_ri = allocate_aligned((n_i, ni.max_steps), dtype=dtype) m_phases = ni.n_trials // ni.every_n assert ni.n_trials % ni.every_n == 0 if ni.n_trials: correlations_ee = np.empty((n_e, n_e, m_phases+1)) else: correlations_ee = np.empty((n_e, n_e, 1)) responses_exc, responses_inh, exc_in, inh_in = estimate_responses( inp.n_stimuli, aff_arrays.locations_idx, aff_arrays.afferents, inp.inh.bg_input, n_e, n_i, pl.rho0, sya, dt_tau_e, dt_tau_i, params.ng.r_max, max_dr_dt_exc, max_dr_dt_inh, ni.max_steps, recording_re, recording_ri ) print(f"Maximum exc. rate {np.max(responses_exc):.2f}") flattened = np.zeros((n_e, inp.n_stimuli ** ng.n_d), dtype=dtype) for n in range(n_e): flattened[n, :] = responses_exc[..., n].flatten() corr = np.corrcoef(flattened) correlations_ee[..., 0] = corr if pl.compute_gradient_angles and pl.eta_e > 0: angles_ie = allocate_aligned((inp.n_stimuli ** params.ng.n_d * ni.n_trials), np.NaN, dtype=dtype) else: angles_ie = None if pl.compute_gradient_angles and pl.eta_i > 0: angles_ei = allocate_aligned((inp.n_stimuli**params.ng.n_d * ni.n_trials), np.NaN, dtype=dtype) else: angles_ei = None if pl.plasticity_type_ei == PlasticityTypeItoE.GRADIENT and ni.n_trials: adam_ei = MomentEstimate( ArrayEI(allocate_aligned(sya.wei.shape, dtype=sya.wei.dtype)), ArrayEI(allocate_aligned(sya.wei.shape, dtype=sya.wei.dtype)), ) else: adam_ei = None if pl.plasticity_type_ie == PlasticityTypeEtoI.GRADIENT and ni.n_trials: adam_ie = MomentEstimate( ArrayIE(allocate_aligned(sya.wie.shape, dtype=sya.wie.dtype)), ArrayIE(allocate_aligned(sya.wie.shape, dtype=sya.wie.dtype)), ) else: adam_ie = None converged = True if ni.n_trials: r_e = VectorE(allocate_aligned(n_e, dtype=dtype)) r_i = VectorI(allocate_aligned(n_i, dtype=dtype)) recording_mu = allocate_aligned((ni.n_trials * inp.n_stimuli**ng.n_d, 4, 2), np.NaN, dtype=dtype) all_t = allocate_aligned((ni.n_trials, inp.n_stimuli ** ng.n_d), -1, dtype=np.int32) all_di = allocate_aligned((1, 2), dtype=dtype) last_n = 10 inh_in_buffer = allocate_aligned((inp.n_stimuli, inp.n_stimuli, inp.n_stimuli, n_e, last_n), dtype=dtype) for m in range(m_phases): print(f"Phase {m+1} of {m_phases}") mu_idx = ni.every_n * (inp.n_stimuli ** ng.n_d) if DIAGNOSTIC_RATE_PLOT: # and not converged: import matplotlib.pyplot as plt fig = plt.figure() gs = GridSpec(2, 2) ax = fig.add_subplot(gs[0, 0]) ax.plot(recording_re.T) ax = fig.add_subplot(gs[0, 1]) last_idx = np.argwhere(np.isfinite(recording_re[0, :]))[-1] ax.hist(np.squeeze(recording_re[:, last_idx]), bins=100) ax = fig.add_subplot(gs[1, 0]) ax.plot(recording_ri.T) ax = fig.add_subplot(gs[1, 1]) ax.hist(np.squeeze(recording_ri[:, last_idx]), bins=100) plt.show() converged, n_run, ni.max_steps, all_di = train_network( n_trials=ni.every_n, rho0=pl.rho0, re=r_e, ri=r_i, sya=sya, eta_e=pl.eta_e, eta_i=pl.eta_i, wie_decay=pl.wie_decay, wei_decay=pl.wei_decay, plasticity_type_ie=pl.plasticity_type_ie, plasticity_type_ei=pl.plasticity_type_ei, bp_weights=pl.bp_weights, afferents=aff_arrays.afferents, bg_input_inh=inp.inh.bg_input, inh_in=inh_in_buffer, trial_t=all_t[m * ni.every_n: (m + 1) * ni.every_n, :], dt_tau_e=dt_tau_e, dt_tau_i=dt_tau_i, dt_bcm_tau_inv=ni.dt * pl.bcm.tau_inv, r_max=params.ng.r_max, max_dr_dt_exc=max_dr_dt_exc, max_dr_dt_inh=max_dr_dt_inh, convergence_max=pl.convergence_max, convergence_mean=pl.convergence_mean, x_locations=aff_arrays.locations_idx[0], y_locations=aff_arrays.locations_idx[1], z_locations=aff_arrays.locations_idx[2], rec_mu=recording_mu[m * mu_idx:(m + 1) * mu_idx, ...], rec_re=recording_re, rec_ri=recording_ri, max_steps=ni.max_steps, do_abort=ni.do_abort, increment_steps_on_non_convergence=0, bcm_theta=pl.bcm.theta, adam_ie=adam_ie, adam_ei=adam_ei, angles_ie=angles_ie[m * mu_idx:(m + 1) * mu_idx] if angles_ie is not None else None, angles_ei=angles_ei[m * mu_idx:(m + 1) * mu_idx] if angles_ei is not None else None, compute_angles=pl.compute_gradient_angles, ) responses_exc, responses_inh, exc_in, inh_in = estimate_responses( inp.n_stimuli, aff_arrays.locations_idx, aff_arrays.afferents, inp.inh.bg_input, n_e, n_i, pl.rho0, sya, dt_tau_e, dt_tau_i, params.ng.r_max, max_dr_dt_exc, max_dr_dt_inh, ni.max_steps, recording_re, recording_ri ) flattened = np.zeros((n_e, inp.n_stimuli ** ng.n_d), dtype=dtype) for n in range(n_e): flattened[n, :] = responses_exc[..., n].flatten() corr = np.corrcoef(flattened) correlations_ee[..., m+1] = corr else: recording_mu = np.zeros((1, 4, 2), dtype=dtype) all_t = np.zeros((1, inp.n_stimuli ** ng.n_d), dtype=np.int32) all_di = np.zeros((1, 2), dtype=dtype) if not np.isfinite(sya.wie).all(): print("wie had NaN or inf values") if not np.isfinite(sya.wei).all(): print("wei had NaN or inf values") if DIAGNOSTIC_RATE_PLOT: # and not converged: import matplotlib.pyplot as plt fig = plt.figure() gs = GridSpec(2, 2) ax = fig.add_subplot(gs[0, 0]) ax.plot(recording_re.T) ax = fig.add_subplot(gs[0, 1]) m = np.argwhere(np.isfinite(recording_re[0, :]))[-1] ax.hist(np.squeeze(recording_re[:, m]), bins=100) ax = fig.add_subplot(gs[1, 0]) ax.plot(recording_ri.T) ax = fig.add_subplot(gs[1, 1]) ax.hist(np.squeeze(recording_ri[:, m]), bins=100) plt.show() osi_e, osi_i = orientation_selectivity_index(inp, responses_exc, responses_inh, aff_arrays) response_sim_ee = compute_response_similarity(responses_exc, responses_exc) response_sim = compute_response_similarity(responses_exc, responses_inh) cc, cp = compute_syn_current_spearmanr(exc_in, inh_in) print(f"Avg correlation between synaptic currents: {np.nanmean(cc):.1f}") print(f"Percentage of cells without strong correlation: {100 * np.nanmean(cp > 1e-3, axis=0):.1f}") if PLOT_RESP_SIMILARITY: import matplotlib.pyplot as plt fig = plt.figure() gs = GridSpec(2, 2) response_sim_ee[np.diag_indices_from(response_sim_ee)] = 0 ax = fig.add_subplot(gs[0, 0]) ax.set_title("corr_ee") ax.imshow(correlations_ee[..., 0]) ax.set_xticks([]) ax.set_yticks([]) ax = fig.add_subplot(gs[0, 1]) ax.set_title("wee") ax.imshow(sya.wee) ax.set_xticks([]) ax.set_yticks([]) ax = fig.add_subplot(gs[1, 0]) ax.set_title("response_sim") ax.imshow(response_sim_ee) ax.set_xticks([]) ax.set_yticks([]) if params.sy.e2e.w_total > dtype(0): ax = fig.add_subplot(gs[1, 1]) ax.set_title("wee - corr_ee") _wee = sya.wee - np.min(sya.wee) _wee /= _wee.max() ax.set_xticks([]) ax.set_yticks([]) ax.imshow(_wee - correlations_ee[..., 0]) plt.show() print("Finished computing responses") t1 = time() t_compute = (t1 - t0) / 60.0 print(f"Compute time {t_compute:.1f} min.") raw_data = dict( converged=converged, steps_to_converge=all_t, max_inh_syn_change=all_di, recording_re=recording_re, recording_ri=recording_ri, recording_mu=recording_mu, angles_ie=angles_ie if angles_ie is not None else np.zeros(1), angles_ei=angles_ei if angles_ei is not None else np.zeros(1), ) computed = dict( responses_exc=responses_exc, responses_inh=responses_inh, response_sim=response_sim, response_sim_ee=response_sim_ee, cc=cc, cp=cp, exc_in=exc_in, inh_in=inh_in, osi_e=osi_e, osi_i=osi_i, correlations_ee=correlations_ee, ) sim_state = dict( wee=sya.wee, wei=sya.wei, wie=sya.wie, wii=sya.wii, wei_init=wei_init, wie_init=wie_init, zei=sya.zei, zie=sya.zie, stimulus_pref=aff_arrays.stimulus_pref, afferents=aff_arrays.afferents, ei_min=sya.ei_min, ie_min=sya.ie_min, ) results: SimResults = dict(raw_data=raw_data, computed=computed, sim_state=sim_state) return results
def test_brockwell_davis_ex533(): # See Brockwell and Davis (2009) - Time Series Theory and Methods # Example 5.3.3: ARMA(1, 1) process, p.g. 177 nobs = 10 ar_params = np.array([0.2]) ma_params = np.array([0.4]) sigma2 = 8.92 p = len(ar_params) q = len(ma_params) m = max(p, q) ar = np.r_[1, -ar_params] ma = np.r_[1, ma_params] # First, get the autocovariance of the process arma_process_acovf = arma_acovf(ar, ma, nobs=nobs, sigma2=sigma2) unconditional_variance = ( sigma2 * (1 + 2 * ar_params[0] * ma_params[0] + ma_params[0]**2) / (1 - ar_params[0]**2)) assert_allclose(arma_process_acovf[0], unconditional_variance) # Next, get the autocovariance of the transformed process # Note: as required by {{prefix}}arma_transformed_acovf, we first divide # through by sigma^2 arma_process_acovf /= sigma2 unconditional_variance /= sigma2 out = np.array( _arma_innovations.darma_transformed_acovf_fast(ar, ma, arma_process_acovf)) acovf = np.array(out[0]) acovf2 = np.array(out[1]) # `acovf` is an m^2 x m^2 matrix, where m = max(p, q) # but it is only valid for the autocovariances of the first m observations # (this means in particular that the block `acovf[m:, m:]` should *not* be # used) # `acovf2` then contains the (time invariant) autocovariance terms for # the observations m + 1, ..., nobs - since the autocovariance is the same # for these terms, to save space we do not construct the autocovariance # matrix as we did for the first m terms. Thus `acovf2[0]` is the variance, # `acovf2[1]` is the first autocovariance, etc. # Test the autocovariance function for observations m + 1, ..., nobs # (it is time invariant here) assert_equal(acovf2.shape, (nobs - m, )) assert_allclose(acovf2[0], 1 + ma_params[0]**2) assert_allclose(acovf2[1], ma_params[0]) assert_allclose(acovf2[2:], 0) # Test the autocovariance function for observations 1, ..., m # (it is time varying here) assert_equal(acovf.shape, (m * 2, m * 2)) # (we need to check `acovf[:m * 2, :m]`, i.e. `acovf[:2, :1])` ix = np.diag_indices_from(acovf) ix_lower = (ix[0][:-1] + 1, ix[1][:-1]) # acovf[ix] is the diagonal, and we want to check the first m # elements of the diagonal assert_allclose(acovf[ix][:m], unconditional_variance) # acovf[ix_lower] is the first lower off-diagonal assert_allclose(acovf[ix_lower][:m], ma_params[0]) # Now, check that we compute the moving average coefficients and the # associated variances correctly out = _arma_innovations.darma_innovations_algo_fast( nobs, ar_params, ma_params, acovf, acovf2) theta = np.array(out[0]) v = np.array(out[1]) # Test v (see eq. 5.3.13) desired_v = np.zeros(nobs) desired_v[0] = unconditional_variance for i in range(1, nobs): desired_v[i] = 1 + (1 - 1 / desired_v[i - 1]) * ma_params[0]**2 assert_allclose(v, desired_v) # Test theta (see eq. 5.3.13) # Note that they will have shape (nobs, m + 1) here, not (nobs, nobs - 1) # as in the original (non-fast) version assert_equal(theta.shape, (nobs, m + 1)) desired_theta = np.zeros(nobs) for i in range(1, nobs): desired_theta[i] = ma_params[0] / desired_v[i - 1] assert_allclose(theta[:, 0], desired_theta) assert_allclose(theta[:, 1:], 0) # Test against Table 5.3.1 endog = np.array([ -1.1, 0.514, 0.116, -0.845, 0.872, -0.467, -0.977, -1.699, -1.228, -1.093 ]) u = _arma_innovations.darma_innovations_filter(endog, ar_params, ma_params, theta) # Note: Table 5.3.1 has \hat X_n+1 = -0.5340 for n = 1, but this seems to # be a typo, since equation 5.3.12 gives the form of the prediction # equation as \hat X_n+1 = \phi X_n + \theta_n1 (X_n - \hat X_n) # Then for n = 1 we have: # \hat X_n+1 = 0.2 (-1.1) + (0.2909) (-1.1 - 0) = -0.5399 # And for n = 2 if we use what we have computed, then we get: # \hat X_n+1 = 0.2 (0.514) + (0.3833) (0.514 - (-0.54)) = 0.5068 # as desired, whereas if we used the book's number for n=1 we would get: # \hat X_n+1 = 0.2 (0.514) + (0.3833) (0.514 - (-0.534)) = 0.5045 # which is not what Table 5.3.1 shows. desired_hat = np.array([ 0, -0.540, 0.5068, -0.1321, -0.4539, 0.7046, -0.5620, -0.3614, -0.8748, -0.3869 ]) desired_u = endog - desired_hat assert_allclose(u, desired_u, atol=1e-4)
filenames.sort() fig, ax = plt.subplots(figsize=(12, 12)) for filename in filenames: lengths = io.load_lengths(filename.replace(".matrix", ".bed")) counts = io.load_counts(filename, lengths=lengths) if "25kb" in filename: resolution = 25000 elif "20000" in filename: resolution = 20000 else: resolution = 10000 counts = counts.toarray() counts = counts.T + counts # Just making sure there is no interaction counted in teh diag counts[np.diag_indices_from(counts)] = 0 counts = filter.filter_low_counts(counts, percentage=0.03, sparsity=False) counts = normalization.ICE_normalization(counts) print("1. Compute count vs genomic distance relationship") mapping = get_mapping(counts, lengths, verbose=True, smoothed=False) ax.plot(mapping[0, 2:] * resolution, mapping[1, 2:]) ax.axhline(mapping[1, 0]) ax.set_yscale("log") ax.set_xscale("log")
def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testval=None): R""" Bartlett decomposition of the Wishart distribution. As the Wishart distribution requires the matrix to be symmetric positive semi-definite it is impossible for MCMC to ever propose acceptable matrices. Instead, we can use the Barlett decomposition which samples a lower diagonal matrix. Specifically: .. math:: \text{If} L \sim \begin{pmatrix} \sqrt{c_1} & 0 & 0 \\ z_{21} & \sqrt{c_2} & 0 \\ z_{31} & z_{32} & \sqrt{c_3} \end{pmatrix} \text{with} c_i \sim \chi^2(n-i+1) \text{ and } n_{ij} \sim \mathcal{N}(0, 1), \text{then} \\ L \times A \times A.T \times L.T \sim \text{Wishart}(L \times L.T, \nu) See http://en.wikipedia.org/wiki/Wishart_distribution#Bartlett_decomposition for more information. Parameters ---------- S : ndarray p x p positive definite matrix Or: p x p lower-triangular matrix that is the Cholesky factor of the covariance matrix. nu : int Degrees of freedom, > dim(S). is_cholesky : bool (default=False) Input matrix S is already Cholesky decomposed as S.T * S return_cholesky : bool (default=False) Only return the Cholesky decomposed matrix. testval : ndarray p x p positive definite matrix used to initialize Note ---- This is not a standard Distribution class but follows a similar interface. Besides the Wishart distribution, it will add RVs c and z to your model which make up the matrix. """ L = S if is_cholesky else scipy.linalg.cholesky(S) diag_idx = np.diag_indices_from(S) tril_idx = np.tril_indices_from(S, k=-1) n_diag = len(diag_idx[0]) n_tril = len(tril_idx[0]) if testval is not None: # Inverse transform testval = np.dot(np.dot(np.linalg.inv(L), testval), np.linalg.inv(L.T)) testval = scipy.linalg.cholesky(testval, lower=True) diag_testval = testval[diag_idx]**2 tril_testval = testval[tril_idx] else: diag_testval = None tril_testval = None c = tt.sqrt( ChiSquared('c', nu - np.arange(2, 2 + n_diag), shape=n_diag, testval=diag_testval)) pm._log.info('Added new variable c to model diagonal of Wishart.') z = Normal('z', 0, 1, shape=n_tril, testval=tril_testval) pm._log.info('Added new variable z to model off-diagonals of Wishart.') # Construct A matrix A = tt.zeros(S.shape, dtype=np.float32) A = tt.set_subtensor(A[diag_idx], c) A = tt.set_subtensor(A[tril_idx], z) # L * A * A.T * L.T ~ Wishart(L*L.T, nu) if return_cholesky: return Deterministic(name, tt.dot(L, A)) else: return Deterministic(name, tt.dot(tt.dot(tt.dot(L, A), A.T), L.T))
def log_marginal_likeli(self, theta=None, eval_gradient=False, clone_kernel=True): """Returns log-marginal likelihood of theta for training data. Parameters ---------- theta : array-like of shape (n_kernel_params,) default=None Kernel hyperparameters for which the log-marginal likelihood is evaluated. If None, the precomputed log_marginal_likelihood of ``self.kernel_.theta`` is returned. eval_gradient : bool, default=False If True, the gradient of the log-marginal likelihood with respect to the kernel hyperparameters at position theta is returned additionally. If True, theta must not be None. clone_kernel : bool, default=True If True, the kernel attribute is copied. If False, the kernel attribute is modified, but may result in a performance improvement. Returns ------- log_likelihood : float Log-marginal likelihood of theta for training data. log_likelihood_gradient : ndarray of shape (n_kernel_params,), optional Gradient of the log-marginal likelihood with respect to the kernel hyperparameters at position theta. Only returned when eval_gradient is True. """ if theta is None: if eval_gradient: raise ValueError( "Gradient can only be evaluated for theta!=None") return self.log_marginal_likelihood_value_ if clone_kernel: kernel = self.kernel_.clone_with_theta(theta) else: kernel = self.kernel_ kernel.theta = theta if eval_gradient: K, K_gradient = kernel(self.X_train_, eval_gradient=True) else: K = kernel(self.X_train_) K[np.diag_indices_from(K)] += self.alpha try: L = cholesky(K, lower=True) # Line 2 except np.linalg.LinAlgError: return (-np.inf, np.zeros_like(theta)) \ if eval_gradient else -np.inf # Support multi-dimensional output of self.y_train_ y_train = self.y_train_ if y_train.ndim == 1: y_train = y_train[:, np.newaxis] alpha = cho_solve((L, True), y_train) # Line 3 # Compute log-likelihood (compare line 7) log_likelihood_dims = -0.5 * np.einsum("ik,ik->k", y_train, alpha) log_likelihood_dims -= np.log(np.diag(L)).sum() log_likelihood_dims -= K.shape[0] / 2 * np.log(2 * np.pi) log_likelihood = log_likelihood_dims.sum(-1) # sum over dimensions # Precompute quantities required for predictions which are independent # of actual query points K = self.kernel_(self.X_train_) K[np.diag_indices_from(K)] += self.alpha try: self.L_ = cholesky(K, lower=True) # Line 2 # self.L_ changed, self._K_inv needs to be recomputed self._K_inv = None except np.linalg.LinAlgError as exc: exc.args = ("The kernel, %s, is not returning a " "positive definite matrix. Try gradually " "increasing the 'alpha' parameter of your " "GaussianProcessRegressor estimator." % self.kernel_, ) + exc.args raise self.alpha_ = cho_solve((self.L_, True), self.y_train_) # Line 3 pred1 = self.predict(uX1) pred2 = self.predict(uX2) phyloss = density_diff(density(pred1), density(pred2)) #print("phyLoss:", 500*phyloss) log_likelihood -= 500 * phyloss #print(log_likelihood) if eval_gradient: # compare Equation 5.9 from GPML tmp = np.einsum("ik,jk->ijk", alpha, alpha) # k: output-dimension tmp -= cho_solve((L, True), np.eye(K.shape[0]))[:, :, np.newaxis] # Compute "0.5 * trace(tmp.dot(K_gradient))" without # constructing the full matrix tmp.dot(K_gradient) since only # its diagonal is required log_likelihood_gradient_dims = \ 0.5 * np.einsum("ijl,jik->kl", tmp, K_gradient) log_likelihood_gradient = log_likelihood_gradient_dims.sum(-1) if eval_gradient: return log_likelihood, log_likelihood_gradient else: return log_likelihood
def custom_leastsq(obj_fn, jac_fn, x0, f_norm2_tol=1e-6, jac_norm_tol=1e-6, rel_ftol=1e-6, rel_xtol=1e-6, max_iter=100, comm=None, verbosity=0, profiler=None): msg = "" converged = False x = x0 f = obj_fn(x) norm_f = _np.dot(f,f) # _np.linalg.norm(f)**2 half_max_nu = 2**62 #what should this be?? tau = 1e-3 nu = 2 mu = 0 #initialized on 1st iter my_cols_slice = None if comm is not None and comm.Get_rank() != 0: verbosity = 0 #Only print to stdout from root process if not _np.isfinite(norm_f): msg = "Infinite norm of objective function at initial point!" for k in range(max_iter): #outer loop # assume x, f, fnorm hold valid values if len(msg) > 0: break #exit outer loop if an exit-message has been set if norm_f < f_norm2_tol: msg = "Sum of squares is at most %g" % f_norm2_tol converged = True; break if verbosity > 0: print("--- Outer Iter %d: norm_f = %g, mu=%g" % (k,norm_f,mu)) if profiler: profiler.mem_check("custom_leastsq: begin outer iter *before de-alloc*") Jac = None; JTJ = None; JTf = None if profiler: profiler.mem_check("custom_leastsq: begin outer iter") Jac = jac_fn(x) if profiler: profiler.mem_check("custom_leastsq: after jacobian:" + "shape=%s, GB=%.2f" % (str(Jac.shape), Jac.nbytes/(1024.0**3)) ) tm = _time.time() if my_cols_slice is None: my_cols_slice = _mpit.distribute_for_dot(Jac.shape[0], comm) JTJ = _mpit.mpidot(Jac.T,Jac,my_cols_slice,comm) #_np.dot(Jac.T,Jac) JTf = _np.dot(Jac.T,f) if profiler: profiler.add_time("custom_leastsq: dotprods",tm) idiag = _np.diag_indices_from(JTJ) norm_JTf = _np.linalg.norm(JTf,ord=_np.inf) norm_x = _np.dot(x,x) # _np.linalg.norm(x)**2 undampled_JTJ_diag = JTJ.diagonal().copy() if norm_JTf < jac_norm_tol: msg = "norm(jacobian) is at most %g" % jac_norm_tol converged = True; break if k == 0: #mu = tau # initial damping element mu = tau * _np.max(undampled_JTJ_diag) # initial damping element #determing increment using adaptive damping while True: #inner loop if profiler: profiler.mem_check("custom_leastsq: begin inner iter") JTJ[idiag] += mu # augment normal equations #JTJ[idiag] *= (1.0 + mu) # augment normal equations try: if profiler: profiler.mem_check("custom_leastsq: before linsolve") tm = _time.time() success = True dx = _np.linalg.solve(JTJ, -JTf) if profiler: profiler.add_time("custom_leastsq: linsolve",tm) except _np.linalg.LinAlgError: success = False if profiler: profiler.mem_check("custom_leastsq: after linsolve") if success: #linear solve succeeded new_x = x + dx norm_dx = _np.dot(dx,dx) # _np.linalg.norm(dx)**2 if verbosity > 1: print(" - Inner Loop: mu=%g, norm_dx=%g" % (mu,norm_dx)) if norm_dx < (rel_xtol**2)*norm_x: msg = "Relative change in |x| is at most %g" % rel_xtol converged = True; break if norm_dx > (norm_x+rel_xtol)/(MACH_PRECISION**2): msg = "(near-)singular linear system"; break new_f = obj_fn(new_x) if profiler: profiler.mem_check("custom_leastsq: after obj_fn") norm_new_f = _np.dot(new_f,new_f) # _np.linalg.norm(new_f)**2 if not _np.isfinite(norm_new_f): # avoid infinite loop... msg = "Infinite norm of objective function!"; break dL = _np.dot(dx, mu*dx - JTf) # expected decrease in ||F||^2 from linear model dF = norm_f - norm_new_f # actual decrease in ||F||^2 if verbosity > 1: print(" (cont): norm_new_f=%g, dL=%g, dF=%g, reldL=%g, reldF=%g" % (norm_new_f,dL,dF,dL/norm_f,dF/norm_f)) if dL/norm_f < rel_ftol and dF/norm_f < rel_ftol and dF/dL < 2.0: msg = "Both actual and predicted relative reductions in the" + \ " sum of squares are at most %g" % rel_ftol converged = True; break if profiler: profiler.mem_check("custom_leastsq: before success") if dL > 0 and dF > 0: # reduction in error: increment accepted! t = 1.0 - (2*dF/dL-1.0)**3 # dF/dL == gain ratio mu *= max(t,1.0/3.0) nu = 2 x,f, norm_f = new_x, new_f, norm_new_f if verbosity > 1: print(" Accepted! gain ratio=%g mu * %g => %g" % (dF/dL,max(t,1.0/3.0),mu)) ##Check to see if we *would* switch to Q-N method in a hybrid algorithm #new_Jac = jac_fn(new_x) #new_JTf = _np.dot(new_Jac.T,new_f) #print(" CHECK: %g < %g ?" % (_np.linalg.norm(new_JTf, # ord=_np.inf),0.02 * _np.linalg.norm(new_f))) break # exit inner loop normally #else: # print("LinSolve Failure!!") # if this point is reached, either the linear solve failed # or the error did not reduce. In either case, reject increment. #Increase damping (mu), then increase damping factor to # accelerate further damping increases. mu *= nu if nu > half_max_nu : #watch for nu getting too large (&overflow) msg = "Stopping after nu overflow!"; break nu = 2*nu if verbosity > 1: print(" Rejected! mu => mu*nu = %g, nu => 2*nu = %g" % (mu, nu)) JTJ[idiag] = undampled_JTJ_diag #restore diagonal #end of inner loop #end of outer loop else: #if no break stmt hit, then we've exceeded maxIter msg = "Maximum iterations (%d) exceeded" % max_iter #JTJ[idiag] = undampled_JTJ_diag #restore diagonal return x, converged, msg
def test_brockwell_davis_ex534(): # See Brockwell and Davis (2009) - Time Series Theory and Methods # Example 5.3.4: ARMA(1, 1) process, p.g. 178 nobs = 10 ar_params = np.array([1, -0.24]) ma_params = np.array([0.4, 0.2, 0.1]) sigma2 = 1 p = len(ar_params) q = len(ma_params) m = max(p, q) ar = np.r_[1, -ar_params] ma = np.r_[1, ma_params] # First, get the autocovariance of the process arma_process_acovf = arma_acovf(ar, ma, nobs=nobs, sigma2=sigma2) assert_allclose(arma_process_acovf[:3], [7.17133, 6.44139, 5.06027], atol=1e-5) # Next, get the autocovariance of the transformed process out = np.array( _arma_innovations.darma_transformed_acovf_fast(ar, ma, arma_process_acovf)) acovf = np.array(out[0]) acovf2 = np.array(out[1]) # See test_brockwell_davis_ex533 for details on acovf vs acovf2 # Test acovf assert_equal(acovf.shape, (m * 2, m * 2)) ix = np.diag_indices_from(acovf) ix_lower1 = (ix[0][:-1] + 1, ix[1][:-1]) ix_lower2 = (ix[0][:-2] + 2, ix[1][:-2]) ix_lower3 = (ix[0][:-3] + 3, ix[1][:-3]) ix_lower4 = (ix[0][:-4] + 4, ix[1][:-4]) assert_allclose(acovf[ix][:m], 7.17133, atol=1e-5) desired = [6.44139, 6.44139, 0.816] assert_allclose(acovf[ix_lower1][:m], desired, atol=1e-5) assert_allclose(acovf[ix_lower2][0], 5.06027, atol=1e-5) assert_allclose(acovf[ix_lower2][1:m], 0.34, atol=1e-5) assert_allclose(acovf[ix_lower3][:m], 0.1, atol=1e-5) assert_allclose(acovf[ix_lower4][:m], 0, atol=1e-5) # Test acovf2 assert_equal(acovf2.shape, (nobs - m, )) assert_allclose(acovf2[:4], [1.21, 0.5, 0.24, 0.1]) assert_allclose(acovf2[4:], 0) # Test innovations algorithm output out = _arma_innovations.darma_innovations_algo_fast( nobs, ar_params, ma_params, acovf, acovf2) theta = np.array(out[0]) v = np.array(out[1]) # Test v (see Table 5.3.2) desired_v = [ 7.1713, 1.3856, 1.0057, 1.0019, 1.0016, 1.0005, 1.0000, 1.0000, 1.0000, 1.0000 ] assert_allclose(v, desired_v, atol=1e-4) # Test theta (see Table 5.3.2) assert_equal(theta.shape, (nobs, m + 1)) desired_theta = np.array([[ 0, 0.8982, 1.3685, 0.4008, 0.3998, 0.3992, 0.4000, 0.4000, 0.4000, 0.4000 ], [0, 0, 0.7056, 0.1806, 0.2020, 0.1995, 0.1997, 0.2000, 0.2000, 0.2000 ], [0, 0, 0, 0.0139, 0.0722, 0.0994, 0.0998, 0.0998, 0.0999, 0.1]]).T assert_allclose(theta[:, :m], desired_theta, atol=1e-4) assert_allclose(theta[:, m:], 0) # Test innovations filter output endog = np.array([ 1.704, 0.527, 1.041, 0.942, 0.555, -1.002, -0.585, 0.010, -0.638, 0.525 ]) u = _arma_innovations.darma_innovations_filter(endog, ar_params, ma_params, theta) desired_hat = np.array([ 0, 1.5305, -0.1710, 1.2428, 0.7443, 0.3138, -1.7293, -0.1688, 0.3193, -0.8731 ]) desired_u = endog - desired_hat assert_allclose(u, desired_u, atol=1e-4)
def fit(self, X, y): """Fit Gaussian process regression model. Parameters ---------- X : array-like, shape = (n_samples, n_features) Training data y : array-like, shape = (n_samples, [n_output_dims]) Target values Returns ------- self : returns an instance of self. """ if self.kernel is None: # Use an RBF kernel as default self.kernel_ = C() * RBF() + WhiteKernel() else: self.kernel_ = clone(self.kernel) # Fix the Covariance matrix if self.x_cov is None: self.x_cov = 0.0 self.propagate_error = False if isinstance(self.x_cov, float): self.x_cov = np.array([self.x_cov]) if np.ndim(self.x_cov) < 2: self.x_cov = np.diag(self.x_cov) self.x_cov = self.x_cov self._rng = check_random_state(self.random_state) X, y = check_X_y(X, y, multi_output=True, y_numeric=True) # Normalize target value if self.normalize_y: self._y_train_mean = np.mean(y, axis=0) # demean y y = y - self._y_train_mean else: self._y_train_mean = np.zeros(1) self.X_train_ = X self.y_train_ = y #====================================== # Step I: Marginal Maximum Likelihood # w/o Derivative of the kernel #====================================== # Choose hyperparameters based on the log-marginal # likelihood self.derivative_term = None optima = self._constrained_optimization( self._obj_func, self.kernel_.theta, self.kernel_.bounds) # extract optimum parameters self.kernel_.theta = optima[0] self.log_marginal_likelihood_value_ = - optima[1] #====================================== # Step II: Solve for Weights #====================================== K = self.kernel_(self.X_train_) K[np.diag_indices_from(K)] += self.alpha try: self.L_ = cholesky(K, lower=True) except np.linalg.LinAlgError as exc: exc.args(f"The kernel {self.kernel_}, is not returing a " "positive definite matrix. Try gradually " "increasing the 'alpha' parameter of your GPR.") + exc.args raise self.alpha_ = cho_solve((self.L_, True), self.y_train_) #====================================== # Step III: Take Derivative #====================================== # Calculate the Derivative for RBF Kernel self.derivative = rbf_derivative( self.X_train_, self.X_train_, self.kernel_(self.X_train_, self.X_train_), self.alpha_, self.kernel_.get_params()['k1__k2__length_scale'] ) # Calculate the derivative term self.derivative_term = np.dot(self.derivative, np.dot(self.x_cov, self.derivative.T)) #====================================== # Step IV: Maximum Marginal Likelihood # w/ Derivative #====================================== # Choose hyperparameters based on the log-marginal # likelihood optima = self._constrained_optimization( self._obj_func, self.kernel_.theta, self.kernel_.bounds) # extract optimum parameters self.kernel_.theta = optima[0] self.log_marginal_likelihood_value_ = - optima[1] K = self.kernel_(self.X_train_) K[np.diag_indices_from(K)] += self.alpha K += self.derivative_term try: self.L_ = cholesky(K, lower=True) self._K_inv = None except np.linalg.LinAlgError as exc: exc.args(f"The kernel {self.kernel_}, is not returing a " "positive definite matrix. Try gradually " "increasing the 'alpha' parameter of your GPR.") + exc.args raise self.alpha_ = cho_solve((self.L_, True), self.y_train_) #====================================== # Step V: Repeat Steps II-IV until # desired convergence #====================================== # TODO: Complete convergence return self
import cvxpy as cp import numpy as np I = np.random.randn(10, 10) > .4 I[np.diag_indices_from(I)] = 0 K = np.shape(I)[0] X = cp.variable(K, K, name='X') const = [] for i in range(K): for j in range(K): if I[i, j] > 0: c = cp.equals(X[i, j], 0) const.append(c) c = cp.equals(cp.diag(X), 1) const.append(c) p = cp.program(cp.minimize(cp.nuclear_norm(X)), const) p.solve(quiet=False) print X.value
# # ## Dense mass matrices # # The main extra is the :func:`exoplanet.get_dense_nuts_step` function that extends the PyMC3 sampling procedure to include support for learning off-diagonal elements of the mass matrix. # This is *very* important for any problems where there are covariances between the parameters (this is true for pretty much all exoplanet models). # A thorough discussion of this [can be found elsewhere online](https://dfm.io/posts/pymc3-mass-matrix/), but here is a simple demo where we sample a covariant Gaussian using :func:`exoplanet.get_dense_nuts_step`. # # First, we generate a random positive definite covariance matrix for the Gaussian: # %% import numpy as np ndim = 5 np.random.seed(42) L = np.random.randn(ndim, ndim) L[np.diag_indices_from(L)] = 0.1 * np.exp(L[np.diag_indices_from(L)]) L[np.triu_indices_from(L, 1)] = 0.0 cov = np.dot(L, L.T) # %% [markdown] # And then we can sample this using PyMC3 and :func:`exoplanet.get_dense_nuts_step`: # %% import pymc3 as pm import exoplanet as xo with pm.Model() as model: pm.MvNormal("x", mu=np.zeros(ndim), chol=L, shape=(ndim, )) trace = pm.sample(tune=2000, draws=2000, chains=2,
def log_marginal_likelihood(self, theta=None, eval_gradient=False, clone_kernel=True): """Return log-marginal likelihood of theta for training data. Parameters ---------- theta : array-like of shape (n_kernel_params,) default=None Kernel hyperparameters for which the log-marginal likelihood is evaluated. If None, the precomputed log_marginal_likelihood of ``self.kernel_.theta`` is returned. eval_gradient : bool, default=False If True, the gradient of the log-marginal likelihood with respect to the kernel hyperparameters at position theta is returned additionally. If True, theta must not be None. clone_kernel : bool, default=True If True, the kernel attribute is copied. If False, the kernel attribute is modified, but may result in a performance improvement. Returns ------- log_likelihood : float Log-marginal likelihood of theta for training data. log_likelihood_gradient : ndarray of shape (n_kernel_params,), optional Gradient of the log-marginal likelihood with respect to the kernel hyperparameters at position theta. Only returned when eval_gradient is True. """ if theta is None: if eval_gradient: raise ValueError( "Gradient can only be evaluated for theta!=None") return self.log_marginal_likelihood_value_ if clone_kernel: kernel = self.kernel_.clone_with_theta(theta) else: kernel = self.kernel_ kernel.theta = theta if eval_gradient: K, K_gradient = kernel(self.X_train_, eval_gradient=True) else: K = kernel(self.X_train_) # Alg. 2.1, page 19, line 2 -> L = cholesky(K + sigma^2 I) K[np.diag_indices_from(K)] += self.alpha try: L = cholesky(K, lower=GPR_CHOLESKY_LOWER, check_finite=False) except np.linalg.LinAlgError: return (-np.inf, np.zeros_like(theta)) if eval_gradient else -np.inf # Support multi-dimensional output of self.y_train_ y_train = self.y_train_ if y_train.ndim == 1: y_train = y_train[:, np.newaxis] # Alg 2.1, page 19, line 3 -> alpha = L^T \ (L \ y) alpha = cho_solve((L, GPR_CHOLESKY_LOWER), y_train, check_finite=False) # Alg 2.1, page 19, line 7 # -0.5 . y^T . alpha - sum(log(diag(L))) - n_samples / 2 log(2*pi) # y is originally thought to be a (1, n_samples) row vector. However, # in multioutputs, y is of shape (n_samples, 2) and we need to compute # y^T . alpha for each output, independently using einsum. Thus, it # is equivalent to: # for output_idx in range(n_outputs): # log_likelihood_dims[output_idx] = ( # y_train[:, [output_idx]] @ alpha[:, [output_idx]] # ) log_likelihood_dims = -0.5 * np.einsum("ik,ik->k", y_train, alpha) log_likelihood_dims -= np.log(np.diag(L)).sum() log_likelihood_dims -= K.shape[0] / 2 * np.log(2 * np.pi) # the log likehood is sum-up across the outputs log_likelihood = log_likelihood_dims.sum(axis=-1) if eval_gradient: # Eq. 5.9, p. 114, and footnote 5 in p. 114 # 0.5 * trace((alpha . alpha^T - K^-1) . K_gradient) # alpha is supposed to be a vector of (n_samples,) elements. With # multioutputs, alpha is a matrix of size (n_samples, n_outputs). # Therefore, we want to construct a matrix of # (n_samples, n_samples, n_outputs) equivalent to # for output_idx in range(n_outputs): # output_alpha = alpha[:, [output_idx]] # inner_term[..., output_idx] = output_alpha @ output_alpha.T inner_term = np.einsum("ik,jk->ijk", alpha, alpha) # compute K^-1 of shape (n_samples, n_samples) K_inv = cho_solve((L, GPR_CHOLESKY_LOWER), np.eye(K.shape[0]), check_finite=False) # create a new axis to use broadcasting between inner_term and # K_inv inner_term -= K_inv[..., np.newaxis] # Since we are interested about the trace of # inner_term @ K_gradient, we don't explicitly compute the # matrix-by-matrix operation and instead use an einsum. Therefore # it is equivalent to: # for param_idx in range(n_kernel_params): # for output_idx in range(n_output): # log_likehood_gradient_dims[param_idx, output_idx] = ( # inner_term[..., output_idx] @ # K_gradient[..., param_idx] # ) log_likelihood_gradient_dims = 0.5 * np.einsum( "ijl,jik->kl", inner_term, K_gradient) # the log likehood gradient is the sum-up across the outputs log_likelihood_gradient = log_likelihood_gradient_dims.sum(axis=-1) if eval_gradient: return log_likelihood, log_likelihood_gradient else: return log_likelihood
def train_KRR_qml(X, y, sigma=1e3, llambda=1e-8): K = compute_kernel_qml(X, X, sigma=sigma) K[np.diag_indices_from(K)] += llambda alpha = cho_solve(K, y) return alpha
def add_data(self, fX, fT, istart=0, icount=np.inf, fHH=None, fHT=None): """Feed new training data (X,T) to HP-ELM model in batches: does not solve ELM itself. This method prepares an intermediate solution data, that takes the most time. After that, obtaining the solution is fast. The intermediate solution consists of two matrices: `HH` and `HT`. They can be in memory for a model computed at once, or stored on disk for a model computed in parts or in parallel. For iterative solution, provide file names for on-disk matrices in the input parameters `fHH` and `fHT`. They will be created if they don't exist, or new results will be merged with the existing ones. This method is multiprocess-safe for parallel writing into files `fHH` and `fHT`, that allows you to easily compute ELM in parallel. The multiprocess-safeness uses Python module 'fasteners' and a lock file, which is named fHH+'.lock' and fHT+'.lock'. Args: fX (hdf5): (part of) input training data size (N * `inputs`) fT (hdf5) (part of) output training data size (N * `outputs`) istart (int, optional): index of first data sample to use from `fX`, `istart` < N. If not given, all data from `fX` is used. Sample with index `istart` is used for training, indexing is 0-based. icount (int, optional): number of data samples to use from `fX`, starting from `istart`, automatically adjusted to `istart` + `icount` <= N. If not given, all data starting from `start` is used. The last sample used for training is `istart`+`icount`-1, so you can index data as: istart_1=0, icount_1=1000; istart_2=1000, icount_2=1000; istart_3=2000, icount_3=1000, ... fHH, fHT (string, optional): file names for storing HH and HT matrices. Files are created if they don't exist, or new result is added to the existing files if they exist. Parallel writing to the same `fHH`, `fHT` files is multiprocess-safe, made specially for parallel training of HP-ELM. Another use is to split a very long training of huge ELM into smaller parts, so the training can be interrupted and resumed later. """ # initialize assert len(self.nnet.neurons) > 0, "Add neurons to ELM before using it" X, T = self._checkdata(fX, fT) N = X.shape[0] _prepare_fHH(fHH, fHT, self.nnet, self.precision) # custom range adjustments icount = min(icount, N - istart) nb = int(np.ceil(float(icount) / self.batch)) # number of batches # weighted classification initialization if self.classification == "wc" and self.wc is None: ns = np.zeros((self.nnet.outputs, )) for b in xrange(nb): # batch sum is much faster start = b * self.batch + istart stop = min((b + 1) * self.batch + istart, icount + istart) ns += T[start:stop].sum(axis=0) ns = ns.astype(self.precision) self.wc = ns.sum( ) / ns # class weights normalized to number of samples # main loop over all the data t = time() t0 = time() wc_vector = None for b in xrange(nb): start = b * self.batch + istart stop = min((b + 1) * self.batch + istart, icount + istart) Xb = X[start:stop] Tb = T[start:stop] if self.classification == "wc": wc_vector = self.wc[np.where( Tb == 1)[1]] # weights for samples in the batch self.nnet.add_batch(Xb, Tb, wc_vector) # report time eta = int(((time() - t0) / (b + 1)) * (nb - b - 1)) if time() - t > self.tprint: print("processing batch %d/%d, eta %d:%02d:%02d" % (b + 1, nb, eta / 3600, (eta % 3600) / 60, eta % 60)) t = time() # if storing output to disk if fHH is not None and fHT is not None: HH, HT = self.nnet.get_corr() HH[np.diag_indices_from( HH)] -= self.nnet.norm # norm is already included _write_fHH(fHH, fHT, HH, HT)