def diffusion_sm_shift_K(N,K,a_bar,a_degger_bar): # matrices to shift above and below diagonal and solve initial equation diag_n_count = diag_count(N) delta = np.eye(N)*a_bar - np.dot(diag_n_count,np.tri(N,N,-1)-np.tri(N,N,-2)) delta_degger = np.eye(N)*a_degger_bar - np.tri(N,N,1)+np.tri(N,N,0) # store generating function coefficients in this matrix: gen = np.zeros((N,K)) # two initial vectors for k2=0 and k2=1 #for n1 in np.arange(N): #gen[n1,0] = 1./2.**(N-1)*np.float(math.factorial(N-1))/(math.factorial(N-1-n1)*math.factorial(n1)) gen[:,0] = np.ones(N) gen[:,1] = np.dot(delta,gen[:,0]) allcond = [] # calculate full matrix gen, i.e. for row n1 and column k2 it contains gen(n1,k2) at steady state for k2 in np.arange(1,K-1): gen[:,k2+1] = la.solve((k2+1)*delta_degger , -np.dot( np.dot(delta_degger,delta) + np.eye(N)*k2 ,gen[:,k2]) - np.dot(delta,gen[:,k2-1])) eigval = la.eig((k2+1)*delta_degger) cond = np.float(np.max(np.abs(eigval[0])))/np.min(np.abs(eigval[0])) # print cond allcond.append(cond) coeff_mat = mat_K(a_bar*a_degger_bar,N,K).T p_mat = np.dot(gen,coeff_mat) p_n1 = np.sum(p_mat, axis=1) # sum of each row n gives value of the marginal distribution p(n) p_n2 = np.sum(p_mat, axis=0) # sum of each column m gives value of the marginal distribution p(m) n1_mean = np.dot(np.arange(N),p_n1) # compare with theoretically expected value: n2_mean = np.dot(np.arange(N),p_n2) #print((n1_mean,n2_mean,np.sum(p_mat))) return (p_mat,allcond)
def test_iradon_angles(): """ Test with different number of projections """ size = 100 # Synthetic data image = np.tri(size) + np.tri(size)[::-1] # Large number of projections: a good quality is expected nb_angles = 200 radon_image_200 = radon(image, theta=np.linspace(0, 180, nb_angles, endpoint=False)) reconstructed = iradon(radon_image_200) delta_200 = np.mean(abs(_rescale_intensity(image) - _rescale_intensity(reconstructed))) assert delta_200 < 0.03 # Lower number of projections nb_angles = 80 radon_image_80 = radon(image, theta=np.linspace(0, 180, nb_angles, endpoint=False)) # Test whether the sum of all projections is approximately the same s = radon_image_80.sum(axis=0) assert np.allclose(s, s[0], rtol=0.01) reconstructed = iradon(radon_image_80) delta_80 = np.mean(abs(image / np.max(image) - reconstructed / np.max(reconstructed))) # Loss of quality when the number of projections is reduced assert delta_80 > delta_200
def test_radon_iradon(): size = 100 debug = False image = np.tri(size) + np.tri(size)[::-1] for filter_type in ["ramp", "shepp-logan", "cosine", "hamming", "hann"]: reconstructed = iradon(radon(image), filter=filter_type) image = rescale(image) reconstructed = rescale(reconstructed) delta = np.mean(np.abs(image - reconstructed)) if debug: print(delta) import matplotlib.pyplot as plt f, (ax1, ax2) = plt.subplots(1, 2) ax1.imshow(image, cmap=plt.cm.gray) ax2.imshow(reconstructed, cmap=plt.cm.gray) plt.show() assert delta < 0.05 reconstructed = iradon(radon(image), filter="ramp", interpolation="nearest") delta = np.mean(abs(image - reconstructed)) assert delta < 0.05 size = 20 image = np.tri(size) + np.tri(size)[::-1] reconstructed = iradon(radon(image), filter="ramp", interpolation="nearest")
def tri_flat(array, UPPER=True): """ Flattens the upper/lower triangle of a square matrix. Parameters ---------- array : np.ndarray square matrix UPPER : boolean Upper or lower triangle to flatten (defaults to upper). If the matrix is symmetric, this parameter is unnecessary. Returns ------- array : np.ndarray vector representation of the upper / lower triangle """ C = array.shape[0] if UPPER: mask = np.asarray(np.invert(np.tri(C,C,dtype=bool)),dtype=float) else: mask = np.asarray(np.invert(np.tri(C,C,dtype=bool).transpose()),dtype=float) x,y = mask.nonzero() return array[x,y]
def test_template(): size = 100 # Float prefactors ensure that image range is between 0 and 1 image = np.full((400, 400), 0.5) target = 0.1 * (np.tri(size) + np.tri(size)[::-1]) target_positions = [(50, 50), (200, 200)] for x, y in target_positions: image[x:x + size, y:y + size] = target np.random.seed(1) image += 0.1 * np.random.uniform(size=(400, 400)) result = match_template(image, target) delta = 5 positions = peak_local_max(result, min_distance=delta) if len(positions) > 2: # Keep the two maximum peaks. intensities = result[tuple(positions.T)] i_maxsort = np.argsort(intensities)[::-1] positions = positions[i_maxsort][:2] # Sort so that order matches `target_positions`. positions = positions[np.argsort(positions[:, 0])] for xy_target, xy in zip(target_positions, positions): assert_almost_equal(xy, xy_target)
def diffusion_recurrence(N): diag = diag_count(N) A1 = np.tri(N,N,1)-np.tri(N,N,0) A2 = np.tri(N,N,2)-np.tri(N,N,1) p_mat_old = np.zeros((N,N)) # two initial vectors: p_mat_old[:,0] = np.zeros(N) p_mat_old[N-1,0] = 1. p_mat_old[:,1] = np.dot(np.dot(A1,diag),p_mat_old[:,0]) # calculate matrix containing the full distribution, i.e. for row n1 and column n2 it contains p(n1,n2) at steady state for n2 in np.arange(2,N): p_mat_old[:,n2] = 1./n2*(-np.dot(np.dot(A2,diag),p_mat_old[:,n2-2]) + (n2-1)*np.dot(A1,p_mat_old[:,n2-1]) + np.dot(np.dot(A1,diag),p_mat_old[:,n2-1])) p_mat_old = p_mat_old/np.sum(p_mat_old) return p_mat_old #p_mat_old = diffusion_recurrence(N) #p_n1_old = np.sum(p_mat_old, axis=1) # sum of each row n gives value of the marginal distribution p(n) #p_n2_old = np.sum(p_mat_old, axis=0) # sum of each column m gives value of the marginal distribution p(m) #n1_mean_old = np.dot(np.arange(N),p_n1_old) # compare with theoretically expected value: #n2_mean_old = np.dot(np.arange(N),p_n2_old)
def crankNicolson(condInitialesPhi, condInitialesPsi, condSpatiales = None, tMax = 0.001, dt=10**-6, v = 1, dx = 1): if np.size(condInitialesPhi) != np.size(condInitialesPsi) : raise Exception("La taille de condInitialesPhi doit être semblable à condInitialesPsi") # Constantes utiles n = np.size(condInitialesPhi) k = -dt * v**2 / dx**2 / 2 N = int(tMax / dt) # Matrice de l’évolution du système evolution = np.zeros((N+1,2*n)) evolution[0,:n] = condInitialesPhi evolution[0,n:] = condInitialesPsi # On créer la matrice d'évolution I = np.eye(n) A = np.tri(n, k = 1).T * np.tri(n, k=-1) A = (A + A.T - 2 * I) * k M = np.array(np.bmat(((I, -dt*I/2),(A, I)))) K = np.array(np.bmat(((I, dt*I/2),(-A, I)))) invM = np.linalg.inv(M) matriceEvolution = np.dot(invM,K) # On applique les conditions spatiales obtenant la liste des points qui varie dans le temps. if condSpatiales is not None : matriceEvolution[condSpatiales] = np.zeros(2*n) matriceEvolution[condSpatiales, condSpatiales] = 1 matriceEvolution[condSpatiales+n] = np.zeros(2*n) for i in range(1,N+1): evolution[i] = np.dot(matriceEvolution,evolution[i-1]) return evolution[:,:n], evolution[:,n:]
def get_potentials_wake(ring, harm_cav, lamb): E0 = ring.E0 C0 = ring.C0 T0 = ring.T0 h = ring.harm_num Rs = harm_cav.shunt_imp alpha = harm_cav.alpha beta = harm_cav.beta z = lamb.z dist = lamb.dist*lamb.cur[:, None] Ll = 1/(1-np.exp(-beta*C0)) # Lesser Gl = Ll*np.exp(-beta*C0) # Greater or Equal A = Ll*np.tri(h, h, -1) + Gl*np.tri(h, h).T ind_b = np.arange(h) dif = ind_b[:, None] - ind_b[None, :] B = np.exp(-beta*C0*dif/h) lamb_w = np.trapz(dist*np.exp(beta*z)[None, :], z) V = np.dot(A*B, lamb_w) Sn = scy_int.cumtrapz(np.exp(beta*z)[None, :] * dist, x=z, initial=0*1j) Vt = np.exp(-beta*z)[None, :] * (V[:, None] + Sn) k = 1 return -T0/E0 * 2*alpha*Rs*(Vt.real - alpha/harm_cav.wrb*Vt.imag), k
def _poisson_exp_dense(X, counts, alpha, bias, beta=None, use_empty_entries=False): m, n = X.shape d = euclidean_distances(X) if use_empty_entries: mask = (np.invert(np.tri(m, dtype=np.bool))) else: mask = np.invert(np.tri(m, dtype=np.bool)) & (counts != 0) & (d != 0) bias = bias.reshape(-1, 1) if beta is None: beta = counts[mask].sum() / ( (d[mask] ** alpha) * (bias * bias.T)[mask]).sum() g = beta * d ** alpha g *= bias * bias.T g = g[mask] ll = counts[mask] * np.log(beta) + \ alpha * counts[mask] * np.log(d[mask]) + \ counts[mask] * np.log(bias * bias.T)[mask] ll -= g # We are trying to maximise, so we need the opposite of the log likelihood if np.isnan(ll.sum()): raise ValueError("Objective function is Not a Number") return - ll.sum()
def test_roberts_diagonal2(): """Roberts' filter on a diagonal edge should be a diagonal line.""" image = np.rot90(np.tri(10, 10, 0), 3) expected = ~np.rot90(np.tri(10, 10, -1).astype(bool) | np.tri(10, 10, -2).astype(bool).transpose()) expected = _mask_filter_result(expected, None) result = F.roberts(image).astype(bool) assert_close(result, expected)
def test_roberts_diagonal1(): """Roberts' filter on a diagonal edge should be a diagonal line.""" image = np.tri(10, 10, 0) expected = ~(np.tri(10, 10, -1).astype(bool) | np.tri(10, 10, -2).astype(bool).transpose()) expected = _mask_filter_result(expected, None) result = filters.roberts(image).astype(bool) assert_array_almost_equal(result, expected)
def test_roberts_diagonal1(): """Roberts' on an edge should be a one diagonal""" image = np.tri(10, 10, 0) expected = ~(np.tri(10, 10, -1).astype(bool) | \ np.tri(10, 10, -2).astype(bool).transpose()) expected = _mask_filter_result(expected,None) result = F.roberts(image).astype(bool) assert_close(result,expected)
def _make_cubic_spline_matrix(nn): ii = np.eye(nn,nn) sub_ii = np.tri(nn,nn,k=-1) - np.tri(nn,nn,k=-2) coeff = 4*ii + sub_ii + sub_ii.transpose() coeff[0][0] = 2 coeff[nn-1][nn-1] = 2 return np.matrix(coeff)
def test_roberts_diagonal2(): """Roberts' on an edge should be a other diagonal""" diagonal = np.tri(10, 10, 0,dtype=int) rev_diagonal = np.rot90(diagonal.transpose(),1) image = (rev_diagonal > 0).astype(float) expected = ~np.rot90((np.tri(10, 10, -1).astype(bool) | \ np.tri(10, 10, -2).astype(bool).transpose()),1) expected = _mask_filter_result(expected,None) result = F.roberts(image).astype(bool) assert_close(result,expected)
def subsequence_ecdf(x, w): n = x.shape[0] return numpy.sum( numpy.where( (1 - numpy.tri(n, k=-1, dtype=bool)).reshape((n, 1, n)).repeat(n, axis=1) & numpy.tri(n, dtype=bool).reshape((1, n, n)).repeat(n, axis=0) & (x.reshape((n, 1)).repeat(n, axis=1) >= x).reshape((1, n, n)).repeat(n, axis=0), w, 0, ), axis=2, ) / subsequence_sums(w)
def triDiag(n): """Retourne une matrice n x n de la forme A = [-2 1 0 0 ...] [1 -2 1 0 ...] [0 1 -2 1 ...] [ .......... ] [... 0 1 -2 1] [... 0 0 1 -2] """ A = np.tri(n, k = 1).T * np.tri(n, k=-1) A = (A + A.T - 2 * np.eye(n)) return A
def test_frt(): SIZE = 59 try: import sympy.ntheory as sn assert sn.isprime(SIZE) == True except ImportError: pass # Generate a test image L = np.tri(SIZE, dtype=np.int32) + np.tri(SIZE, dtype=np.int32)[::-1] f = frt2(L) fi = ifrt2(f) assert len(np.nonzero(L - fi)[0]) == 0
def get_gauss_means_vec(u1, v1, I): n = len(I) aux = np.repeat(u1[:,np.newaxis],n, axis=1) #calculate x*\exp{-\int_s^t g(v)dv} aux[np.tri(n, n, 0)==0] = 1 mu1 = np.cumprod(aux, 0) aux[np.tri(n, n, -1)==0] = 1 y = np.cumprod(aux,0) y[np.tri(n, n, 0)==0] = 0 y = v1*I*y mu2 = np.cumsum(y[:,::-1], 1)[:, ::-1] return mu1, mu2
def vech(X, order='F'): """Returns vectorization of lower triangle of X in column major order by default.""" assert X.ndim == 2, 'vech operator requires a matrix.' m, n = X.shape if order == 'F': idx = np.where(1 - np.tri(n,m, -1, dtype=int)) return X.T[idx] elif order == 'C': i,j = np.where(np.tri(m,n, dtype=int)) else: raise Exception("Only order C and F are allowed") return X[i,j]
def crankNicolson2D(condInitialesPhi, condInitialesPsi, condSpatiales = None, tMax = 0.001, dt=10**-6, v = 100, dx = 1, intervalSauvegarde=1): if not np.array_equal(np.shape(condInitialesPhi), np.shape(condInitialesPsi)): raise Exception("La taille de condInitialesPhi doit être similaire à la taille condInitialesPsi.") # Constantes utiles n = np.shape(condInitialesPhi) #(ny,nx) if n[0] != n[1] : raise Exception("Les dimensions x et y doivent être similaires.") n = n[0] k = -dt * v**2 / dx**2 / 2 N = int(tMax / dt) # Matrice de l’évolution du système evolution = np.zeros((int(N/intervalSauvegarde)+1,2*n*n)) evolution[0,:n*n] = condInitialesPhi.flatten() evolution[0,n*n:] = condInitialesPsi.flatten() phi = evolution[0] I = np.eye(n*n) A = np.tri(n*n, k = 1).T * np.tri(n*n, k=-1) A = (A + A.T - 4 * np.eye(n*n))*k B = np.eye((n-1)*n)*k A[:-n,n:] = A[:-n,n:] + B A[n:,:-n] = A[n:,:-n] + B M = np.array(np.bmat( ((I, -dt*I/2), (A, I)))) K = np.array(np.bmat( ((I, dt*I/2), (-A, I)))) invM = np.linalg.inv(M) matriceEvolution = np.dot(invM,K) # On applique les conditions spatiales obtenant la liste des points qui varie dans le temps. if condSpatiales is not None : idx = np.array(condSpatiales[0]+n*condSpatiales[1],"int") matriceEvolution[idx] = np.zeros(2*n*n) matriceEvolution[idx, idx] = 1 matriceEvolution[idx+n*n] = np.zeros(2*n*n) for i in range(1,N+1): phi = np.dot(matriceEvolution,phi) if i % intervalSauvegarde == 0: evolution[int(i // intervalSauvegarde)] = phi return evolution[:,:n*n], evolution[:,n*n:]
def psi(self, x, y): # x is unaries # y is a labeling ## unary features: gx, gy = np.ogrid[:x.shape[0], :x.shape[1]] selected_unaries = x[gx, gy, y] unaries_acc = np.sum(x[gx, gy, y]) unaries_acc = np.bincount(y.ravel(), selected_unaries.ravel(), minlength=self.n_states) ##accumulated pairwise #make one hot encoding labels = np.zeros((y.shape[0], y.shape[1], self.n_states), dtype=np.int) gx, gy = np.ogrid[:y.shape[0], :y.shape[1]] labels[gx, gy, y] = 1 # vertical edges vert = np.dot(labels[1:, :, :].reshape(-1, self.n_states).T, labels[:-1, :, :].reshape(-1, self.n_states)) # horizontal edges horz = np.dot(labels[:, 1:, :].reshape(-1, self.n_states).T, labels[:, :-1, :].reshape(-1, self.n_states)) pw = vert + horz pw = pw + pw.T - np.diag(np.diag(pw)) feature = np.hstack([unaries_acc, pw[np.tri(self.n_states, dtype=np.bool)]]) return feature
def _flat2D(fld, center='Atlantic'): """convert mds 2D data into global 2D field""" nx = fld.shape[1] ny = fld.shape[0] n = ny//nx//4 # eastern and western hemispheres eastern=np.concatenate((fld[:n*nx,:],fld[n*nx:2*(n*nx)]),axis=1) tmp = fld[2*(n*nx)+nx:, ::-1] western=np.concatenate((tmp[2::n,:].transpose(), tmp[1::n,:].transpose(), tmp[0::n,:].transpose())) # Arctic face is special arctic = fld[2*(n*nx):2*(n*nx)+nx,:] arctice = np.concatenate((np.triu(arctic[::-1,:nx//2].transpose()), np.zeros((nx//2,nx))),axis=1) # arcticw = np.concatenate((arctic[:,nx:nx//2-1:-1].transpose(), # np.zeros((nx//2,nx//2)), # arctic[nx:nx//2-1:-1,nx//2-1::-1]),axis=1) mskr = np.tri(nx//2)[::-1,:] arcticw = np.concatenate((arctic[0:nx//2,nx:nx//2-1:-1].transpose(), arctic[nx//2:nx,nx:nx//2-1:-1].transpose()*mskr, np.triu(arctic[nx:nx//2-1:-1,nx:nx//2-1:-1]), arctic[nx:nx//2-1:-1,nx//2-1::-1]*mskr),axis=1) # if center == 'Pacific': gfld = np.concatenate( ( np.concatenate((eastern,arctice)), np.concatenate((western,arcticw)) ), axis=1) else: gfld = np.concatenate( ( np.concatenate((western,arcticw)), np.concatenate((eastern,arctice)) ), axis=1) return gfld
def triang_decomp(c): """Return a lower triangular matrix B that B * B.T = C""" n = c.shape[0] b = np.tri(n, n) b[1:n, 0] = c[1:n, 0] for i in xrange(1, n): b[i, 1:i+1] = np.sqrt(1 - c[i, 0]**2) for i in xrange(2, n): for j in xrange(1, i): b1 = np.dot(b[j, 0:j], b[i, 0:j].T) b2 = np.dot(b[j, j], b[i, j]) cosinv = (c[i, j] - b1)/b2 if np.isfinite(cosinv): if cosinv > 1: b[i, j] = b[i, j] b[i, j+1:n+1] = 0 elif cosinv < -1: b[i, j] = -b[i, j] b[i, j+1:n+1] = 0 else: b[i, j] = b[i, j]*cosinv sinTheta = np.sqrt(1 - cosinv**2) for k in xrange(j+1, n): b[i, k] = b[i, k]*sinTheta return b
def printHeatMap(marginals, words, outFile): N = len(words) words_uni = [i.decode('UTF-8') for i in words] heatmap = np.zeros((N+1, N+1)) for chart in marginals: heatmap[chart[0], chart[1]] = math.log(marginals[chart]) fig, ax = plt.subplots() mask = np.tri(heatmap.shape[0], k=0) heatmap = np.ma.array(heatmap, mask=mask) cmap = plt.cm.get_cmap('RdBu') cmap.set_bad('w') im = ax.pcolor(heatmap, cmap=cmap, alpha=0.8) font = mpl.font_manager.FontProperties(fname='/usr0/home/avneesh/spectral-scfg/data/wqy-microhei.ttf') ax.grid(True) ax.set_ylim([0,N]) ax.invert_yaxis() ax.set_yticks(np.arange(heatmap.shape[1]-1)+0.5, minor=False) ax.set_yticklabels(words_uni, minor=False, fontproperties=font) ax.set_xticks(np.arange(heatmap.shape[0])+0.5, minor=True) ax.set_xticklabels(np.arange(heatmap.shape[0]), minor=True) ax.set_xticks([]) cbar = fig.colorbar(im, use_gridspec=True) cbar.set_label('ln(sum)') ax.set_xlabel('Span End') ax.xaxis.set_label_position('top') ax.xaxis.tick_top() plt.ylabel('Span starting at word: ') plt.tight_layout() #ax.set_title('CKY Heat Map: Node Marginals') fig.savefig(outFile)
def plot_corr(df, size=10): """Function plots a graphical correlation matrix for each pair of columns in the dataframe. Input: df: pandas DataFrame size: vertical and horizontal size of the plot""" import matplotlib.pyplot as plt from matplotlib import cm import numpy as np corr = df.corr() label = df.corr() mask = np.tri(corr.shape[0], k=-1) corr = np.ma.array(corr, mask=mask) mask[np.triu_indices_from(mask)] = True fig, ax = plt.subplots(figsize=(size, size)) ax.matshow(corr) cmap = cm.get_cmap("jet", 10) cmap.set_bad("w") plt.xticks(range(len(label.columns)), label.columns, rotation=90) plt.yticks(range(len(label.columns)), label.columns) ax.imshow(corr, interpolation="nearest", cmap=cmap) plt.show()
def __init__(self, metric, ndim=1, extra=[]): # Special case 1-D for speed. if ndim == 1: self.nextra = len(extra) super(RadialKernel, self).__init__(*(np.append(extra, metric)), ndim=1) else: inds = np.tri(ndim, dtype=bool) try: l = len(metric) except TypeError: pars = np.diag(float(metric) * np.ones(ndim))[inds] else: if l == ndim: pars = np.diag(metric)[inds] else: pars = np.array(metric) if l != (ndim*ndim + ndim) / 2: raise ValueError("Dimension mismatch") self.nextra = len(extra) super(RadialKernel, self).__init__(*(np.append(extra, pars)), ndim=ndim) # Build the gradient indicator masks. self.gm = np.empty(np.append(len(pars), self.matrix.shape), dtype=int) for i in range(len(pars)): ind = np.zeros(len(pars), dtype=int) ind[i] = 1 self.gm[i] = self._build_matrix(ind)
def compute_T_vectorized(transmissivity): # really vectorized version... to work with arbitrary dimensions # of input transmissivity # Assumption is the last dimension of transmissivity is vertical trans_shape = np.shape(transmissivity) N = trans_shape[-1] otherdims = trans_shape[:-1] ones = np.ones(otherdims)[..., np.newaxis] tau = np.concatenate((ones, transmissivity), axis=-1) tiletau = np.tile(tau[..., np.newaxis],N+1) # equivalent to taking transpose of last two axes #B = np.rollaxis(tiletau, -1, -2) B = tiletau matdims = np.append(np.array(otherdims),[1,1]) # dimensions of matrix should be [otherdims,N+1,N+1] tri = np.tile(np.tri(N+1).transpose(),matdims) # np.tril refuses to broadcast over other dims in numpy < 1.9 # use a custom version instead, below # Performance is BETTER with numpy 1.9 A = tril(B,k=-1) + tri #Tup = tril(np.cumprod(A, axis=-2)) ## transpose over last two axes #Tdown = np.rollaxis(Tup, -1, -2) Tdown = tril(np.cumprod(A, axis=-2)) # transpose over last two axes Tup = np.rollaxis(Tdown, -1, -2) return Tup, Tdown
def get_pairwise_potentials(self, x, w): """Computes pairwise potentials for x and w. Parameters ---------- x : tuple Instance Representation. w : ndarray, shape=(size_psi,) Weight vector for CRF instance. Returns ------- pairwise : ndarray, shape=(n_states, n_states) Pairwise weights. """ self._check_size_w(w) self._check_size_x(x) pairwise_flat = np.asarray(w[self.n_states * self.n_features:]) pairwise_params = np.zeros((self.n_states, self.n_states)) # set lower triangle of matrix, then make symmetric # we could try to redo this using ``scipy.spatial.distance`` somehow pairwise_params[np.tri(self.n_states, dtype=np.bool)] = pairwise_flat return (pairwise_params + pairwise_params.T - np.diag(np.diag(pairwise_params)))
def _gen_lists(labels): """Generate matched lists of row and column index labels. Shortcut function for generating matched lists of row and col index labels for the set of pairwise comparisons specified by the list of those indices recovered using ``np.nonzero(interaction)``. Reproduces values of iterated indices from the nested for-loops contained in ``get_dist`` function in original code from [1]_. Parameters ---------- labels : numpy.array array containing the indices of nonzero elements in one dimension of an interaction matrix Returns ------- k_labels : numpy.array index labels specifying row-wise member of pairwise interaction t_labels : numpy.array index labels specifying column-wise member of pairwise interaction References ---------- .. [1] Hommola K, Smith JE, Qiu Y, Gilks WR (2009) A Permutation Test of Host-Parasite Cospeciation. Molecular Biology and Evolution, 26, 1457-1468. """ i_array, j_array = np.transpose(np.tri(len(labels)-1)).nonzero() j_array = j_array + 1 return labels[i_array], labels[j_array]
def test_cho_factor(): for i in xrange(1, 11): A = rand_mat(i) A2 = A.copy() L1 = np.linalg.cholesky(A) L2 = np.empty_like(A, order='F') la.cho_factor(A, L2) L2 *= np.tri(i) assert np.allclose(L1, L2) assert (A == A2).all() la.cho_factor(A, A) A *= np.tri(i) assert np.allclose(L1, A) assert not (A == A2).all()
def compute_connectivity(functional): with np.errstate(invalid="ignore"): corr = np.nan_to_num(np.corrcoef(functional)) mask = np.invert(np.tri(corr.shape[0], k=-1, dtype=bool)) m = ma.masked_where(mask == 1, mask) return ma.masked_where(m, corr).compressed()
def triangular_columnwise_assign(self, A_qnn, A_Nn, band_rank): """Assign the sub-blocks pertaining from a given rank to the lower triangular part of a Hermitian matrix A_NN. This subroutine is used for matrix assembly. Parameters: A_qnn: ndarray Sub-blocks belonging to the specified rank. A_Nn: ndarray Full column vector in which to write contributions from sub-blocks. band_rank: int Communicator rank to which the sub-blocks belongs. Note that a Hermitian matrix requires Q=B//2+1 blocks of M x M elements where B is the communicator size and M=N//B for N bands. """ N = self.bd.mynbands B = self.bd.comm.size assert band_rank in xrange(B) if B == 1: # Only fill in the lower part mask = np.tri(N).astype(bool) A_Nn[mask] = A_qnn.reshape((N, N))[mask] return # A_qnn[q2,myn1,myn2] on rank q1 is the q2'th overlap calculated # between <psi_n1| and A|psit_n2> where n1 <-> (q1,myn1) and # n2 <-> ((q1+q2)%B,myn2) since we've sent/recieved q2 times. q1 = band_rank Q = B // 2 + 1 if debug: assert A_qnn.shape == (Q, N, N) # Note that for integer inequalities, these relations are useful (X>0): # A*X > B <=> A > B//X ^ A*X <= B <=> A <= B//X if self.bd.strided: raise NotImplementedError """ A_nbn = A_NN.reshape((N, B, N)) mask = np.empty((N,N), dtype=bool) for q2 in range(Q): # n1 = (q1+q2)%B + myn1*B ^ n2 = q1 + myn2*B # # We seek the lower triangular part i.e. n1 >= n2 # <=> (myn2-myn1)*B <= (q1+q2)%B-q1 # <=> myn2-myn1 <= dq//B dq = (q1+q2)%B-q1 # within ]-B; Q[ so dq//B is -1 or 0 # Create mask for lower part of current block mask[:] = np.tri(N, N, dq//B) if debug: m1,m2 = np.indices((N,N)) assert dq in xrange(-B+1,Q) assert (mask == (m1 >= m2 - dq//B)).all() # Copy lower part of A_qnn[q2] to its rightfull place A_nbn[:, (q1+q2)%B][mask] = A_qnn[q2][mask] # Negate the transposed mask to get complementary mask mask = ~mask.T # Copy upper part of Hermitian conjugate of A_qnn[q2] A_nbn[:, q1][mask] = A_qnn[q2].T.conj()[mask] #XXX on rank (q1+q2)%B """ else: A_bnn = A_Nn.reshape((B, N, N)) for q2 in range(Q): # n1 = ((q1+q2)%B)*N + myn1 ^ n2 = q1*N + myn2 # # We seek the lower triangular part i.e. n1 >= n2 # <=> ((q1+q2)%B-q1)*N >= myn2-myn1 # <=> myn2-myn1 <= dq*N # <=> entire block if dq > 0, # ... myn2 <= myn1 if dq == 0, # ... copy nothing if dq < 0 if q1 + q2 < B: A_bnn[q1 + q2] = A_qnn[q2] reqs = [] if q1 < Q - 1: # receive from ranks >= Q if debug: Q2 = np.arange(Q, B - q1) print 'q1=%d, q2: %12s | recv from q1+q2:%12s -> A_bnn%s' % ( q1, Q2.tolist(), (q1 + Q2).tolist(), (q1 + Q2).tolist()) for q2 in range(Q, B - q1): rrank = q1 + q2 A_nn = A_bnn[q1 + q2] reqs.append(self.bd.comm.receive(A_nn, rrank, block=False)) elif q1 >= Q: # send to ranks < Q-1 if debug: Q2 = np.arange(B - q1, B - Q + 1)[::-1] print 'q1=%d, q2: %12s | send to q1+q2-B:%12s <- A_qnn%s.T.conj()' % ( q1, Q2.tolist(), (q1 + Q2 - B).tolist(), Q2.tolist()) for q2 in reversed(range(B - q1, B - Q + 1)): # symmetrize comm. srank = q1 + q2 - B sbuf_nn = np.conjugate(A_qnn[q2].T) # always a copy! reqs.append(self.bd.comm.send(sbuf_nn, srank, block=False)) else: if debug: print 'q1=%d, do nothing...' % q1 self.bd.comm.waitall(reqs)
def triangular_blockwise_assign(self, A_qnn, A_NN, band_rank): """Assign the sub-blocks pertaining from a given rank to the lower triangular part of a Hermitian matrix A_NN. This subroutine is used for matrix assembly. Parameters: A_qnn: ndarray Sub-blocks belonging to the specified rank. A_NN: ndarray Full matrix in which to write contributions from sub-blocks. band_rank: int Communicator rank to which the sub-blocks belongs. Note that a Hermitian matrix requires Q=B//2+1 blocks of M x M elements where B is the communicator size and M=N//B for N bands. """ N = self.bd.mynbands B = self.bd.comm.size assert band_rank in xrange(B) if B == 1: # Only fill in the lower part mask = np.tri(N).astype(bool) A_NN[mask] = A_qnn.reshape((N, N))[mask] return # A_qnn[q2,myn1,myn2] on rank q1 is the q2'th overlap calculated # between <psi_n1| and A|psit_n2> where n1 <-> (q1,myn1) and # n2 <-> ((q1+q2)%B,myn2) since we've sent/recieved q2 times. q1 = band_rank Q = B // 2 + 1 if debug: assert A_qnn.shape == (Q, N, N) # Note that for integer inequalities, these relations are useful (X>0): # A*X > B <=> A > B//X ^ A*X <= B <=> A <= B//X if self.bd.strided: A_nbnb = A_NN.reshape((N, B, N, B)) mask = np.empty((N, N), dtype=bool) for q2 in range(Q): # n1 = (q1+q2)%B + myn1*B ^ n2 = q1 + myn2*B # # We seek the lower triangular part i.e. n1 >= n2 # <=> (myn2-myn1)*B <= (q1+q2)%B-q1 # <=> myn2-myn1 <= dq//B dq = (q1 + q2) % B - q1 # within ]-B; Q[ so dq//B is -1 or 0 # Create mask for lower part of current block mask[:] = np.tri(N, N, dq // B) if debug: m1, m2 = np.indices((N, N)) assert dq in xrange(-B + 1, Q) assert (mask == (m1 >= m2 - dq // B)).all() # Copy lower part of A_qnn[q2] to its rightfull place A_nbnb[:, (q1 + q2) % B, :, q1][mask] = A_qnn[q2][mask] # Negate the transposed mask to get complementary mask mask = ~mask.T # Copy upper part of Hermitian conjugate of A_qnn[q2] A_nbnb[:, q1, :, (q1 + q2) % B][mask] = A_qnn[q2].T.conj()[mask] else: A_bnbn = A_NN.reshape((B, N, B, N)) # Optimization for the first block if q1 == 0: A_bnbn[:Q, :, 0] = A_qnn return for q2 in range(Q): # n1 = ((q1+q2)%B)*N + myn1 ^ n2 = q1*N + myn2 # # We seek the lower triangular part i.e. n1 >= n2 # <=> ((q1+q2)%B-q1)*N >= myn2-myn1 # <=> myn2-myn1 <= dq*N # <=> entire block if dq > 0, # ... myn2 <= myn1 if dq == 0, # ... copy nothing if dq < 0 if q1 + q2 < B: A_bnbn[q1 + q2, :, q1] = A_qnn[q2] else: A_bnbn[q1, :, q1 + q2 - B] = A_qnn[q2].T.conj()
def _find_diag_blocks(a): """Find diagonal blocks of in a boolean matrix Find all square blocks of value `True` on the diagonal of a symmetric array. Parameters ---------- a : numpy.ndarray Boolean array, which has to be symmetric. Returns ------- start, end : numpy.ndarray 1D arrays containing start and end row numbers for each block Examples -------- >>> a = numpy.array([[1, 1, 0], [1, 1, 1], [0, 1, 1]]) >>> _find_diag_blocks(a) (array([0, 1]), array([1, 2])) """ a = a.copy() # Set lower triangle to True so that only entries in upper triangle # are found when searching for False a[np.tri(*a.shape, -1, dtype=bool)] = True # Find first False entry in every row. If (and only if) there is none, # min_col will be 0 for that column, thus set it to a.shape[1] min_col = np.argmin(a, axis=1) min_col[min_col == 0] = a.shape[1] # If the difference of two consecutive indices is larger than 0, a new # block starts # e. g. for [[1, 1, 0], [1, 1, 1], [0, 1, 1]], min_col is [2, 3, 3], # the diff is [1, 0] while True: col_diff = np.diff(min_col) # if diff is < 0 somewhere, this may lead to false positives for the # preceding rows. E. g. if diff is -3 here and was 4 for the previous # row, the "net diff" is 1. neg_idx = np.nonzero(col_diff < 0)[0] if not len(neg_idx): break # overwrite the preceding value so that the diff is 0 and retry # this is fine since we are only interested in positive diffs below min_col[neg_idx] = min_col[neg_idx + 1] is_start = np.hstack(([True], col_diff > 0)) # first row is always start # To determine where blocks end, one has to basically do the same as for # starts, only with rows in reversed order and look for diff < 0 min_row = np.argmin(a[::-1, :], axis=0) min_row[min_row == 0] = a.shape[0] while True: row_diff = np.diff(min_row) pos_idx = np.nonzero(row_diff > 0)[0] if not len(pos_idx): break min_row[pos_idx + 1] = min_row[pos_idx] is_end = np.hstack((row_diff < 0, [True])) return is_start.nonzero()[0], is_end.nonzero()[0]
def fit_n_layer_profile(depths, crr_n15s, n=3, crr_n15_opts=None, cap=0.6, max_depth=20): """ Determines the best n-layer profile fit for a given parameter Method: 1) Computes the cumulative absolute difference between the upper bound and the actual values 2) Cycles through a range of guess values (between upper and lower bounds), computes the cumulative absolute difference between a guessed value and the actual values 3) For each guess it determines the change points 4) For each guess, Evaluates which change points pairs are best (either 1 or 2 depending on n). 5) For each guess, for each change point pair, compute mean of values between pair (least error) 6) For each guess, compute the total error as the sum of the error from all of the non-fitted and fitted layers 7) Select guess which has the least error Parameters ---------- depths: array Distance from surface crr_n15s: array Actual values to fit to n: int (3 or 5) Number of layers to fit crr_n15_opts: array_like Possibly options for CRR_n15 max_depth: float Maximum distance to consider in fitting cap: float Clips all actual values to this maximum prior to fitting Returns ------- array_like: depths to top of liq layers array_like: depths to top of non-liq layers array_like: fitted values in liq layers float: normalised error """ if not (n == 3 or n == 5): raise ValueError("n must be either 3 or 5") n_liqs = int((n - 1) / 2) # prepare search array if crr_n15_opts is None: if n_liqs == 1: crr_n15_opts = np.array([0.6, 0.5, 0.2, 0.06]) # q_c1n_cs = np.arange(0, 180., 5.) # crr_n15_opts = bi_functions.calc_crr_m7p5_from_qc1ncs(q_c1n_cs)[::-1] else: q_c1n_cs = np.arange(0, 180., 5.) crr_n15_opts = bi_functions.calc_crr_m7p5_from_qc1ncs( q_c1n_cs)[::-1] # standardise depth if depths[-1] > max_depth: indy = np.where(depths > max_depth)[0][0] else: indy = len(depths) std_depths = depths[:indy] std_crr_n15s = crr_n15s[:indy] n_depths = len(std_depths) # enforce cap std_crr_n15s = np.clip(std_crr_n15s, None, cap) # compute difference between options and actual values init_diffs = std_crr_n15s[:, np.newaxis] - crr_n15_opts[np.newaxis, :] init_cdiffs = np.cumsum(np.abs(init_diffs), axis=0) # prepare output arrays normed_diffs = [] d_liqs = [[] for i in range(n_liqs)] d_nonliqs = [[] for i in range(n_liqs)] crrs = [[] for i in range(n_liqs)] # evaluate each option for ii in range(len(crr_n15_opts)): opts_lay = [] eline = init_cdiffs[:, 0] - init_cdiffs[:, ii] peak_ids = eqsig.get_peak_array_indices(eline) + 1 if len(peak_ids) > 4 or (len(peak_ids) > 2 and n_liqs == 1): # continue if eline[peak_ids[1]] > 0: peak_ids = peak_ids[:-1] else: peak_ids = peak_ids[1:-1] poss_i_tops = np.take(peak_ids, np.arange(0, len(peak_ids), 2)) poss_i_bots = np.take(peak_ids, np.arange(1, len(peak_ids), 2)) poss_i_tops = np.insert(poss_i_tops, len(poss_i_tops), len(std_crr_n15s) - 1) # add end poss_i_bots = np.insert(poss_i_bots, len(poss_i_bots), len(std_crr_n15s) - 1) poss_err_tops = np.take(eline, poss_i_tops) poss_err_bots = np.take(eline, poss_i_bots) for ll in range(n_liqs): if len(poss_i_tops) < n_liqs or len(poss_i_bots) < n_liqs: opts_lay.append([0]) else: # error occurred for each change point pair opts_lay.append(poss_err_tops[np.newaxis, ll:] - poss_err_bots[ll:, np.newaxis]) # remove cases where i_bot is higher than i_top opts_lay[ll] *= np.tri(*opts_lay[ll].shape) if n_liqs == 1: opts_l1f = opts_lay[0] j, i = np.unravel_index(opts_l1f.argmin(), opts_l1f.shape) i_tops = [int(poss_i_tops[i])] i_bots = [int(poss_i_bots[j])] elif n_liqs == 2: opts_l1f = opts_lay[0].flatten() top1_is = poss_i_tops[np.newaxis, :] * np.ones_like( opts_lay[0], dtype=int) bot1_is = poss_i_bots[:, np.newaxis] * np.ones_like( opts_lay[0], dtype=int) opts_l2f = opts_lay[1].flatten() top2_is = poss_i_tops[np.newaxis, 1:] * np.ones_like( opts_lay[1], dtype=int) bot2_is = poss_i_bots[1:, np.newaxis] * np.ones_like( opts_lay[1], dtype=int) opts = opts_l1f[np.newaxis, :] + opts_l2f[:, np.newaxis] top1_is = top1_is.flatten()[np.newaxis, :] * np.ones_like( opts, dtype=int) bot1_is = bot1_is.flatten()[np.newaxis, :] * np.ones_like( opts, dtype=int) top2_is = top2_is.flatten()[:, np.newaxis] * np.ones_like( opts, dtype=int) bot2_is = bot2_is.flatten()[:, np.newaxis] * np.ones_like( opts, dtype=int) # remove cases where i_crust_l2 < i_liq_l1 opts = np.where(top2_is < bot1_is, 1e10, opts) # remove cases where i_crust_l2 > i_liq_l2 opts = np.where(top2_is > bot2_is, 1e10, opts) opts = np.where(top2_is == len(std_depths) - 1, 1e10, opts) lay1_i, lay2_i = np.unravel_index(opts.argmin(), opts.shape) i_tops = [ int(top1_is[lay1_i][lay2_i]), int(top2_is[lay1_i][lay2_i]) ] i_bots = [ int(bot1_is[lay1_i][lay2_i]), int(bot2_is[lay1_i][lay2_i]) ] else: raise ValueError("n_liqs must be either 1 or 2") # total_err = init_cdiffs[i_tops[0]][0] crr_profile = np.ones_like(std_crr_n15s) * cap for ll in range(n_liqs): d_liqs[ll].append(depths[i_tops[ll]]) d_nonliqs[ll].append(depths[i_bots[ll]]) if i_tops[ll] == i_bots[ll]: crrs[ll].append(cap) # err = 0 else: # Median equal to min of absolute deviation crr_median = np.median(std_crr_n15s[i_tops[ll]:i_bots[ll]]) crrs[ll].append(crr_median) crr_profile[i_tops[ll]:i_bots[ll]] = crr_median # err = np.sum(np.abs(capped_values[i_tops[ll]:i_bots[ll]] - crrs[ll][ii])) # total_err += err total_err = np.sum(abs(std_crr_n15s - crr_profile)) normed_diffs.append(total_err / (n_depths * cap)) else: if len(peak_ids) <= 2: for ll in range(n_liqs): d_liqs[ll].append(0) d_nonliqs[ll].append(depths[-1]) crrs[ll].append(cap) normed_diffs.append(1e6) else: d_liqs[0].append(depths[peak_ids[1]]) d_nonliqs[0].append(depths[peak_ids[2]]) crr_median = np.median(std_crr_n15s[peak_ids[1]:peak_ids[2]]) crrs[0].append(crr_median) d_liqs[1].append(depths[-1]) d_nonliqs[1].append(depths[-1]) crrs[1].append(cap) crr_profile = np.ones_like(std_crr_n15s) * cap crr_profile[peak_ids[1]:peak_ids[2]] = crr_median total_err = np.sum(abs(std_crr_n15s - crr_profile)) normed_diffs.append(total_err / (n_depths * cap)) normed_diffs = np.array(normed_diffs) d_liqs = np.array(d_liqs) d_nonliqs = np.array(d_nonliqs) crrs = np.array(crrs) i_best = np.argmin(normed_diffs) normed_diff = normed_diffs[i_best] # change to ECP logic of depths to top of layers d_liqs = d_liqs[:, i_best] d_nonliqs = d_nonliqs[:, i_best] return d_liqs, d_nonliqs, crrs[:, i_best], normed_diff
def plot_matrix(mat, title=None, labels=None, figure=None, axes=None, colorbar=True, cmap=plt.cm.RdBu_r, tri='full', auto_fit=True, grid=False, reorder=False, **kwargs): """Plot the given matrix. Parameters ---------- mat : 2-D numpy array Matrix to be plotted. title : string or None, optional A text to add in the upper left corner. labels : list, ndarray of strings, empty list, False, or None, optional The label of each row and column. Needs to be the same length as rows/columns of mat. If False, None, or an empty list, no labels are plotted. figure : figure instance, figsize tuple, or None, optional Sets the figure used. This argument can be either an existing figure, or a pair (width, height) that gives the size of a newly-created figure. Specifying both axes and figure is not allowed. axes : None or Axes, optional Axes instance to be plotted on. Creates a new one if None. Specifying both axes and figure is not allowed. colorbar : boolean, optional If True, an integrated colorbar is added. Default=True. cmap : matplotlib colormap, optional The colormap for the matrix. Default=plt.cm.RdBu_r. tri : {'full', 'lower', 'diag'}, optional Which triangular part of the matrix to plot: 'lower' is the lower part, 'diag' is the lower including diagonal, and 'full' is the full matrix. Default='full'. auto_fit : boolean, optional If auto_fit is True, the axes are dimensioned to give room for the labels. This assumes that the labels are resting against the bottom and left edges of the figure. Default=True. grid : color or False, optional If not False, a grid is plotted to separate rows and columns using the given color. Default=False. reorder : boolean or {'single', 'complete', 'average'}, optional If not False, reorders the matrix into blocks of clusters. Accepted linkage options for the clustering are 'single', 'complete', and 'average'. True defaults to average linkage. Default=False. .. note:: This option is only available with SciPy >= 1.0.0. .. versionadded:: 0.4.1 kwargs : extra keyword arguments, optional Extra keyword arguments are sent to pylab.imshow. Returns ------- display : instance of matplotlib Axes image. """ # we need a list so an empty one will be cast to False if isinstance(labels, np.ndarray): labels = labels.tolist() if labels and len(labels) != mat.shape[0]: raise ValueError("Length of labels unequal to length of matrix.") if reorder: if not labels: raise ValueError("Labels are needed to show the reordering.") try: from scipy.cluster.hierarchy import (linkage, optimal_leaf_ordering, leaves_list) except ImportError: raise ImportError("A scipy version of at least 1.0 is needed " "for ordering the matrix with " "optimal_leaf_ordering.") valid_reorder_args = [True, 'single', 'complete', 'average'] if reorder not in valid_reorder_args: raise ValueError("Parameter reorder needs to be " "one of {}.".format(valid_reorder_args)) if reorder is True: reorder = 'average' linkage_matrix = linkage(mat, method=reorder) ordered_linkage = optimal_leaf_ordering(linkage_matrix, mat) index = leaves_list(ordered_linkage) # make sure labels is an ndarray and copy it labels = np.array(labels).copy() mat = mat.copy() # and reorder labels and matrix labels = labels[index].tolist() mat = mat[index, :][:, index] if tri == 'lower': mask = np.tri(mat.shape[0], k=-1, dtype=bool) ^ True mat = np.ma.masked_array(mat, mask) elif tri == 'diag': mask = np.tri(mat.shape[0], dtype=bool) ^ True mat = np.ma.masked_array(mat, mask) if axes is not None and figure is not None: raise ValueError("Parameters figure and axes cannot be specified " "together. You gave 'figure=%s, axes=%s'" % (figure, axes)) if figure is not None: if isinstance(figure, plt.Figure): fig = figure else: fig = plt.figure(figsize=figure) axes = plt.gca() own_fig = True else: if axes is None: fig, axes = plt.subplots(1, 1, figsize=(7, 5)) own_fig = True else: fig = axes.figure own_fig = False display = axes.imshow(mat, aspect='equal', interpolation='nearest', cmap=cmap, **kwargs) axes.set_autoscale_on(False) ymin, ymax = axes.get_ylim() if not labels: axes.xaxis.set_major_formatter(plt.NullFormatter()) axes.yaxis.set_major_formatter(plt.NullFormatter()) else: axes.set_xticks(np.arange(len(labels))) axes.set_xticklabels(labels, size='x-small') for label in axes.get_xticklabels(): label.set_ha('right') label.set_rotation(50) axes.set_yticks(np.arange(len(labels))) axes.set_yticklabels(labels, size='x-small') for label in axes.get_yticklabels(): label.set_ha('right') label.set_va('top') label.set_rotation(10) if grid is not False: size = len(mat) # Different grids for different layouts if tri == 'lower': for i in range(size): # Correct for weird mis-sizing i = 1.001 * i axes.plot([i + 0.5, i + 0.5], [size - 0.5, i + 0.5], color='grey') axes.plot([i + 0.5, -0.5], [i + 0.5, i + 0.5], color='grey') elif tri == 'diag': for i in range(size): # Correct for weird mis-sizing i = 1.001 * i axes.plot([i + 0.5, i + 0.5], [size - 0.5, i - 0.5], color='grey') axes.plot([i + 0.5, -0.5], [i - 0.5, i - 0.5], color='grey') else: for i in range(size): # Correct for weird mis-sizing i = 1.001 * i axes.plot([i + 0.5, i + 0.5], [size - 0.5, -0.5], color='grey') axes.plot([size - 0.5, -0.5], [i + 0.5, i + 0.5], color='grey') axes.set_ylim(ymin, ymax) if auto_fit: if labels: fit_axes(axes) elif own_fig: plt.tight_layout(pad=.1, rect=((0, 0, .95, 1) if colorbar else (0, 0, 1, 1))) if colorbar: cax, kw = make_axes(axes, location='right', fraction=0.05, shrink=0.8, pad=.0) fig.colorbar(mappable=display, cax=cax) # make some room fig.subplots_adjust(right=0.8) # change current axis back to matrix plt.sca(axes) if title is not None: # Adjust the size text_len = np.max([len(t) for t in title.split('\n')]) size = axes.bbox.size[0] / text_len axes.text(0.95, 0.95, title, horizontalalignment='right', verticalalignment='top', transform=axes.transAxes, size=size) return display
def tri(N, M=None, k=0): return np.tri(N, M, k)
cov_test = np.zeros([n_test, M, M]) # calculate for all digits/direction for i_train in range(n_train): I = get_input(train_data, i_train, n_train) mean_train[i_train, :] = I.sum(axis=1) cov_train[i_train, :, :] = np.tensordot(I[:, 1:], I[:, :-1], axes=(1, 1)) / (T - 1) for i_test in range(n_test): I = get_input(test_data, i_test, n_test) mean_test[i_test, :] = I.sum(axis=1) cov_test[i_test, :, :] = np.tensordot(I[:, 1:], I[:, :-1], axes=(1, 1)) / (T - 1) mask_tri = np.tri(M, M, 0, dtype=np.bool) cov_train = cov_train[:, mask_tri] cov_test = cov_test[:, mask_tri] #%% # classification i_n_pat = 0 n_rep = 10 # mean, cov, cov with m cov entries (randomly chosen) instead of m(m-1)/2 perf = np.zeros([n_rep, 3]) CM = np.zeros([n_rep, 3, n_cat, n_cat]) c_MLR = skppl.make_pipeline(
def inference_svd(user_batch, item_batch, wins_batch, fails_batch, user_num, item_num, dim=5, device="/cpu:0"): with tf.device("/cpu:0"): bias_global = tf.get_variable("bias_global", shape=[]) user_bias = tf.get_variable( "user_bias", shape=[user_num], initializer=tf.truncated_normal_initializer(stddev=1)) item_bias = tf.get_variable( "item_bias", shape=[item_num], initializer=tf.truncated_normal_initializer(stddev=1)) bias_users = tf.nn.embedding_lookup(user_bias, user_batch, name="bias_users") bias_items = tf.nn.embedding_lookup(item_bias, item_batch, name="bias_items") # For PFA-like # item_wins = tf.get_variable("item_wins", shape=[item_num], # initializer=tf.truncated_normal_initializer(stddev=1)) # item_fails = tf.get_variable("item_fails", shape=[item_num], # initializer=tf.truncated_normal_initializer(stddev=1)) # wins_items = tf.nn.embedding_lookup(item_wins, item_batch, name="wins_items") # fails_items = tf.nn.embedding_lookup(item_fails, item_batch, name="fails_items") # For ordinal regression # thresholds = tf.get_variable("thresholds", shape=[item_num, NB_CLASSES - 1], # initializer=tf.truncated_normal_initializer(stddev=1)) # threshold_items = tf.nn.embedding_lookup(thresholds, item_batch, name="thre_items") user_features = tf.get_variable( "user_features", shape=[user_num, dim], initializer=tf.truncated_normal_initializer(stddev=0.02)) item_features = tf.get_variable( "item_features", shape=[item_num, dim], initializer=tf.truncated_normal_initializer(stddev=0.02)) # item_wins_features = tf.get_variable("item_wins_features", shape=[item_num, dim], # initializer=tf.truncated_normal_initializer(stddev=0.02)) # item_fails_features = tf.get_variable("item_fails_features", shape=[item_num, dim], # initializer=tf.truncated_normal_initializer(stddev=0.02)) feat_users = tf.nn.embedding_lookup(user_features, user_batch, name="feat_users") feat_items = tf.nn.embedding_lookup(item_features, item_batch, name="feat_items") # For PFA-like # wins_feat_items = tf.nn.embedding_lookup(item_wins_features, item_batch, name="wins_feat_items") # fails_feat_items = tf.nn.embedding_lookup(item_fails_features, item_batch, name="fails_feat_items") with tf.device(device): logits = tf.reduce_sum(tf.multiply(feat_users, tf.abs(feat_items)), 1) logits = tf.add(logits, bias_global) logits = tf.add(logits, bias_users) logits = tf.add(logits, bias_items, name='logits') # For PFA-like # logits = tf.add(logits, wins_batch * wins_items) # logits = tf.add(logits, fails_batch * fails_items) # bonus_wins = tf.reduce_sum(tf.multiply(wins_feat_items, feat_items), 1) # bonus_fails = tf.reduce_sum(tf.multiply(fails_feat_items, feat_items), 1) # logits = tf.add(logits, wins_batch * bonus_wins) # logits = tf.add(logits, fails_batch * bonus_fails, name="svd_inference") if NB_CLASSES > 2: cumulative_op = tf.constant(np.tri(NB_CLASSES - 1).T, dtype=tf.float32) pos_threshold_items = tf.matmul( tf.exp(threshold_items), cumulative_op) #- bias_items[:, None] logits_cdf = logits[:, None] - pos_threshold_items # Computing pdf for ordinal regression (needed to get the inferred label) cdf = tf.sigmoid(logits_cdf) pdf2cdf_A = tf.constant( np.fromfunction(lambda i, j: (j == i + 1) - 1. * (j == i), (NB_CLASSES - 1, NB_CLASSES), dtype=float), dtype=tf.float32) pdf2cdf_b = tf.constant(np.fromfunction(lambda i, j: 1. * (j == 0), (1, NB_CLASSES), dtype=float), dtype=tf.float32) pdf = tf.matmul(cdf, pdf2cdf_A) + pdf2cdf_b #logits_pdf = tf.log(pdf / (1 - pdf)) #test = -logits_cdf + tf.abs(threshold_items) h = tf.exp(threshold_items[:, 1:]) a = logits_cdf[:, :-1] logits_pdf = tf.concat( ( -logits_cdf[:, 0][:, None], # h - tf.log(tf.exp(a) + tf.exp(-a + h) + 2), tf.log(1 - tf.exp(-h)) - tf.abs(a) - tf.log( tf.exp(-h + tf.minimum(2 * a, 0)) + tf.exp(tf.minimum(-2 * a, 0)) + 2 * tf.exp(-h - tf.abs(a))), logits_cdf[:, -1][:, None]), 1) infer = tf.argmax(pdf, axis=1) else: pdf = tf.sigmoid(logits) infer = tf.round(pdf) # Regularization l2_user = tf.nn.l2_loss(feat_users) l1_user = tf.reduce_sum(tf.abs(feat_users)) l2_item = tf.nn.l2_loss(feat_items) l1_item = tf.reduce_sum(tf.abs(feat_items)) l2_bias_user = tf.nn.l2_loss(bias_users) l2_bias_item = tf.nn.l2_loss(bias_items) regularizer = tf.add(l2_user, l2_item) regularizer = tf.add(regularizer, l2_bias_user) regularizer = tf.add(regularizer, l2_bias_item, name="regularizer") # return infer, logits, logits_cdf, logits_pdf, regularizer, user_bias, user_features, item_bias, item_features, thresholds return infer, logits, regularizer, user_bias, user_features, item_bias, item_features
def tri(n,mode='exact'): x = np.array(1*(np.tri(n)>0),dtype=dtypes[mode]) x = normalize(x) return x
def fill_blank(self, mask): idx = tf.argmax(mask, axis=1) fill_mat = tf.convert_to_tensor( np.tri(self.max_sentence_len, k=-1).astype('float32')) fill = tf.gather(fill_mat, idx) return mask + fill
def forward(self, x, labels, **kwargs): """ Args: x: feature matrix with shape (batch_size, feat_dim). labels: ground truth labels with shape (num_classes). """ batch_size = x.size(0) ncenters, nfeas = self.centers.size() distmat_x2cent = calc_distmat2(x, self.centers) classes = torch.arange(self.num_classes).long() if self.use_gpu: classes = classes.cuda() classes = Variable(classes) labels = labels.unsqueeze(1).expand(batch_size, self.num_classes) mask = labels.eq(classes.expand(batch_size, self.num_classes)) dists_dcl = [] dists_pull = [] if not self.mode: _zero = torch.zeros(1).cuda() return _zero, _zero, _zero, _zero modes = self.mode.split('.') for i in range(batch_size): dist_pull = distmat_x2cent[i][mask[i]] dists_pull.append(dist_pull) if 'dcl' in modes: if 'min' in modes: dist_push = distmat_x2cent[i][1 - mask[i]].min() * self.push_wei else: dist_push = ( distmat_x2cent[i][1 - mask[i]] * self.push_wei).mean() if 'with1' in modes: dists_dcl.append(dist_pull / (dist_push + 1)) else: dists_dcl.append(dist_pull / dist_push) if 'margin' in modes: dists_dcl.append( (torch.max(torch.zeros(1).cuda(), dist_pull / distmat_x2cent[i][1 - mask[i]] - 1 + self.margin2 ) * self.push_wei).mean() ) if 'exp' in modes: # choose only most largest k value in negative logits = -distmat_x2cent[i] if self.args.topk != -1: neg_topk, _ = torch.topk(logits[1 - mask[i]], k=self.args.topk, largest=True) pos = logits[mask[i]] logits = torch.cat([pos, neg_topk]) shift_logits = logits - torch.max(logits) Z = torch.exp(shift_logits).sum() dist_now = shift_logits - torch.log(Z) # dist_now = F.log_softmax(logits, dim=0) dists_dcl.append(-dist_now[0]) # dists_dcl # if 'nopos' in modes: # dists_dcl.append( # -torch.exp(-dist_pull) / torch.exp(-distmat_x2cent[i][1 - mask[i]]).sum() # ) # else: # dists_dcl.append( # - torch.exp(-dist_pull) / torch.exp(-distmat_x2cent[i]).sum() # ) loss_pull = torch.cat(dists_pull).mean() if 'cent' in modes: loss = loss_pull elif 'ccent' in modes: if dists_dcl[0].shape == (): dists_dcl = torch.stack(dists_dcl) else: dists_dcl = torch.cat(dists_dcl) loss = dists_dcl.mean() else: loss = torch.zeros(1).cuda() distmat_cent2cent = calc_distmat2(self.centers, self.centers) # if 'disall' in modes: mask = to_torch(np.tri(ncenters, dtype=np.uint8) - np.identity(ncenters, dtype=np.uint8)).cuda() cent_pairs = distmat_cent2cent[mask] loss_dis = -cent_pairs.mean() # else: # mask = to_torch(np.identity(ncenters, dtype=np.float32)).cuda() * distmat_cent2cent.max() # loss_dis = (distmat_cent2cent + mask).min(dim=1) # loss_dis = -loss_dis.mean() return loss, loss_dis, distmat_cent2cent, loss_pull
import numpy as np import pandas as pd n = 4 #int(input('Size n of matrix: n= ')) A_any = np.random.randn(n, n) # Generate a random (n x n) matrix U_any = A_any * np.tri(n).T # Triangulating matrix x_true = np.random.randn(n) b_any = np.dot(U_any, x_true) def backU(U, b, n): '''Takes inn triangular matrix U, vector b and dimention of matrix n computes x from matrix equation Ax=b troughout nested backsubstitution''' x_computed = np.zeros(n) for i in range(n - 1, -1, -1): # itererer over matrisen vertikalt x_tmp = b[i] # henter ut siste kjente x for j in range( n - 1, i, -1 ): # iterer over kollonene for neste x gitt x_temp = kollonens b x_tmp = x_tmp - x_computed[j] * U[i, j] # beregner neste x x_computed[i] = x_tmp / U[i, i] return x_computed x_numpy = lambda U, b: np.linalg.solve(U, b) # numpy sin innebygde solver
def refresh(curr, new_data_and_pairs, log=True): new_data = new_data_and_pairs[0] if not '.' in str(step): rounding_to = 0 else: rounding_to = len(str(step).split('.')[1]) if len(new_data.shape) > 1: tri = new_data * np.tri(*new_data.shape) max_value = np.max(new_data) trash_symbol = round(max_value + step, rounding_to) tri[tri == 0] = trash_symbol groups_numbers = np.floor(np.true_divide(tri, step)).astype(int) trash_group_number = np.floor(np.true_divide(trash_symbol, step)).astype(int) unique_groups_numbers, counts = np.unique(groups_numbers, return_counts=True) trash_group_index = np.where(unique_groups_numbers == trash_group_number)[0][0] # print('TRASH GROUP INDEX', trash_group_index) unique_groups_numbers = np.delete(unique_groups_numbers, [trash_group_index]) counts = np.delete(counts, [trash_group_index]) else: groups_numbers = np.floor(np.true_divide(new_data, step)).astype(int) unique_groups_numbers, counts = np.unique(groups_numbers, return_counts=True) iterator = zip(unique_groups_numbers, counts) iterator = tqdm(iterator, desc='refreshing distribution', total=counts.shape[0]) if log else iterator for gn, count in iterator: if not (gn in curr): curr[gn] = { 'from': round(gn * step, rounding_to), 'to': round((gn + 1) * step, rounding_to), 'number': 0, 'examples': [] } curr[gn]['number'] += count if len(new_data_and_pairs) > 1: pairs = new_data_and_pairs[1] limit = new_data_and_pairs[2] ### stupid version # examples_already_found_for_group_number = {g: (len(curr[g]['examples']) == limit) for g in curr} # if not (False in examples_already_found_for_group_number.values()): # return curr # for I in tqdm(np.ndindex(groups_numbers.shape), desc='searching for examples', total=groups_numbers.size): # group_number = groups_numbers[I] # if group_number in curr: # if len(curr[group_number]['examples']) < limit: # example = [pairs[i] for i in I] # curr[group_number]['examples'].append(example) # if len(curr[group_number]['examples']) == limit: # examples_already_found_for_group_number[group_number] = True # if not (False in examples_already_found_for_group_number.values()): # return curr ### ### smart version curr_iterator = tqdm(curr, desc='searching for examples') if log else curr for group_number in curr_iterator: examples_left = limit - len(curr[group_number]['examples']) if examples_left != 0: indexes = np.vstack(np.where(groups_numbers == group_number)).T # objects_counts = {} # for e in indexes: # for i in e: # if not (i in objects_counts): # objects_counts[i] = 0 # objects_counts[i] += 1 # print('MAX IS', max(objects_counts.values()), 'from', indexes.size) attempt = 0 while True: attempt += 1 # print('attempt', attempt) if indexes.shape[0] > 100 * examples_left: # print('CHOICE') required_indexes = indexes[np.random.choice(indexes.shape[0], examples_left, replace=False), :].tolist() else: # print('SHUFFLE') np.random.shuffle(indexes) required_indexes = indexes[:examples_left].tolist() new_examples = [[pairs[i] for i in I] for I in required_indexes] # objects_counts = {} # for e in new_examples: # for i in e: # if not (i in objects_counts): # objects_counts[i] = 0 # objects_counts[i] += 1 # if (max(objects_counts.values()) <= len(objects_counts) // 4) or (attempt == 10): # break break curr[group_number]['examples'] += [[pairs[i] for i in I] for I in required_indexes] ### return curr
def _smacof_single(similarities, metric=True, n_components=2, init=None, max_iter=300, verbose=0, eps=1e-3, random_state=None): """ Computes multidimensional scaling using SMACOF algorithm Parameters ---------- similarities: symmetric ndarray, shape [n * n] similarities between the points metric: boolean, optional, default: True compute metric or nonmetric SMACOF algorithm n_components: int, optional, default: 2 number of dimension in which to immerse the similarities overwritten if initial array is provided. init: {None or ndarray}, optional if None, randomly chooses the initial configuration if ndarray, initialize the SMACOF algorithm with this array max_iter: int, optional, default: 300 Maximum number of iterations of the SMACOF algorithm for a single run verbose: int, optional, default: 0 level of verbosity eps: float, optional, default: 1e-6 relative tolerance w.r.t stress to declare converge random_state: integer or numpy.RandomState, optional The generator used to initialize the centers. If an integer is given, it fixes the seed. Defaults to the global numpy random number generator. Returns ------- X: ndarray (n_samples, n_components), float coordinates of the n_samples points in a n_components-space stress_: float The final value of the stress (sum of squared distance of the disparities and the distances for all constrained points) """ n_samples = similarities.shape[0] random_state = check_random_state(random_state) if similarities.shape[0] != similarities.shape[1]: raise ValueError("similarities must be a square array (shape=%d)" % n_samples) if not np.allclose(similarities, similarities.T): raise ValueError("similarities must be symmetric") sim_flat = ((1 - np.tri(n_samples)) * similarities).ravel() sim_flat_w = sim_flat[sim_flat != 0] if init is None: # Randomly choose initial configuration X = random_state.rand(n_samples * n_components) X = X.reshape((n_samples, n_components)) else: # overrides the parameter p n_components = init.shape[1] if n_samples != init.shape[0]: raise ValueError("init matrix should be of shape (%d, %d)" % (n_samples, n_components)) X = init old_stress = None ir = IsotonicRegression() for it in range(max_iter): # Compute distance and monotonic regression dis = euclidean_distances(X) if metric: disparities = similarities else: dis_flat = dis.ravel() # similarities with 0 are considered as missing values dis_flat_w = dis_flat[sim_flat != 0] # Compute the disparities using a monotonic regression disparities_flat = ir.fit_transform(sim_flat_w, dis_flat_w) disparities = dis_flat.copy() disparities[sim_flat != 0] = disparities_flat disparities = disparities.reshape((n_samples, n_samples)) disparities *= np.sqrt( (n_samples * (n_samples - 1) / 2) / (disparities**2).sum()) # Compute stress stress = ((dis.ravel() - disparities.ravel())**2).sum() / 2 # Update X using the Guttman transform dis[dis == 0] = 1e-5 ratio = disparities / dis B = -ratio B[np.arange(len(B)), np.arange(len(B))] += ratio.sum(axis=1) X = 1. / n_samples * np.dot(B, X) dis = np.sqrt((X**2).sum(axis=1)).sum() if verbose >= 2: print('it: %d, stress %s' % (it, stress)) if old_stress is not None: if (old_stress - stress / dis) < eps: if verbose: print('breaking at iteration %d with stress %s' % (it, stress)) break old_stress = stress / dis return X, stress
simi=cosine_similarity(em_train_x) # In[272]: serieslist=[] labels=[] for k in tqdm_notebook(list(train_dict_collect_industry.keys())): index=train_dict_collect_industry[k] train_x=Data[index] em_train_x=model.predict(train_x) simi=cosine_similarity(em_train_x) series=simi[np.tri(simi.shape[0],simi.shape[1],k=-1)==1] #k should be -1 serieslist.append(series) labels.append(k) # In[143]: fig=plt.figure(figsize=(12,6)) # plt.xticks([2,3,4]) plt.boxplot(serieslist,showmeans=True,vert=True,labels=labels) # In[155]:
def LLE(x, tau, n, T, fs): """Calculate largest Lyauponov exponent of a given time series x using Rosenstein algorithm. Parameters ---------- x list a time series n integer embedding dimension tau integer Embedding lag fs integer Sampling frequency T integer Mean period Returns ---------- Lexp float Largest Lyapunov Exponent Notes ---------- A n-dimensional trajectory is first reconstructed from the observed data by use of embedding delay of tau, using pyeeg function, embed_seq(x, tau, n). Algorithm then searches for nearest neighbour of each point on the reconstructed trajectory; temporal separation of nearest neighbours must be greater than mean period of the time series: the mean period can be estimated as the reciprocal of the mean frequency in power spectrum Each pair of nearest neighbours is assumed to diverge exponentially at a rate given by largest Lyapunov exponent. Now having a collection of neighbours, a least square fit to the average exponential divergence is calculated. The slope of this line gives an accurate estimate of the largest Lyapunov exponent. References ---------- Rosenstein, Michael T., James J. Collins, and Carlo J. De Luca. "A practical method for calculating largest Lyapunov exponents from small data sets." Physica D: Nonlinear Phenomena 65.1 (1993): 117-134. Examples ---------- >>> import pyeeg >>> X = numpy.array([3,4,1,2,4,51,4,32,24,12,3,45]) >>> pyeeg.LLE(X,2,4,1,1) >>> 0.18771136179353307 """ Em = embed_seq(x, tau, n) M = len(Em) A = numpy.tile(Em, (len(Em), 1, 1)) B = numpy.transpose(A, [1, 0, 2]) square_dists = (A - B)**2 # square_dists[i,j,k] = (Em[i][k]-Em[j][k])^2 D = numpy.sqrt( square_dists[:, :, :].sum(axis=2)) # D[i,j] = ||Em[i]-Em[j]||_2 # Exclude elements within T of the diagonal band = numpy.tri(D.shape[0], k=T) - numpy.tri(D.shape[0], k=-T - 1) band[band == 1] = numpy.inf neighbors = (D + band).argmin( axis=0) # nearest neighbors more than T steps away # in_bounds[i,j] = (i+j <= M-1 and i+neighbors[j] <= M-1) inc = numpy.tile(numpy.arange(M), (M, 1)) row_inds = (numpy.tile(numpy.arange(M), (M, 1)).T + inc) col_inds = (numpy.tile(neighbors, (M, 1)) + inc.T) in_bounds = numpy.logical_and(row_inds <= M - 1, col_inds <= M - 1) # Uncomment for old (miscounted) version #in_bounds = numpy.logical_and(row_inds < M - 1, col_inds < M - 1) row_inds[-in_bounds] = 0 col_inds[-in_bounds] = 0 # neighbor_dists[i,j] = ||Em[i+j]-Em[i+neighbors[j]]||_2 neighbor_dists = numpy.ma.MaskedArray(D[row_inds, col_inds], -in_bounds) J = (-neighbor_dists.mask).sum( axis=1) # number of in-bounds indices by row # Set invalid (zero) values to 1; log(1) = 0 so sum is unchanged neighbor_dists[neighbor_dists == 0] = 1 d_ij = numpy.sum(numpy.log(neighbor_dists.data), axis=1) mean_d = d_ij[J > 0] / J[J > 0] x = numpy.arange(len(mean_d)) X = numpy.vstack((x, numpy.ones(len(mean_d)))).T [m, c] = numpy.linalg.lstsq(X, mean_d)[0] Lexp = fs * m return Lexp
def plot_FFA_config_confusion_matrix1(): """ 样图参见figures-20210604.pptx Supplemental Figure S2 """ import numpy as np import pickle as pkl from matplotlib import pyplot as plt # inputs hemis = ('lh', 'rh') figsize = (6.4, 4.8) fpath = pjoin(work_dir, 'FFA_config_confusion_mat_{}.pkl') # outputs out_file = pjoin(work_dir, 'FFA_config_confusion_mat1.jpg') # prepare n_hemi = len(hemis) # plot _, axes = plt.subplots(1, n_hemi, figsize=figsize) for hemi_idx, hemi in enumerate(hemis): ax = axes[hemi_idx] data = pkl.load(open(fpath.format(hemi), 'rb')) configs = data['configuration'] n_config = len(configs) ticks = np.arange(n_config) arr = data['matrix'] + data['matrix'].T diag_idx_arr = np.eye(n_config, dtype=bool) arr[diag_idx_arr] = arr[diag_idx_arr] / 2 tril_mask = np.tri(n_config, k=-1) arr = np.ma.array(arr, mask=tril_mask) ax.imshow(arr, 'autumn') ax.tick_params(top=True, bottom=False, labeltop=True, labelbottom=False) ax.set_xticks(ticks) ax.set_xticklabels(configs) plt.setp(ax.get_xticklabels(), rotation=-30, ha="right", rotation_mode="anchor") if hemi_idx == 0: ax.set_yticks(ticks) ax.set_yticklabels(configs) else: ax.set_yticks(ticks) ax.tick_params(left=False, labelleft=False) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.spines['left'].set_visible(False) ax.set_xticks(np.arange(n_config) - .5, minor=True) ax.set_yticks(np.arange(n_config) - .5, minor=True) ax.grid(which="minor", color="w", linestyle='-', linewidth=3) ax.tick_params(which="minor", bottom=False, left=False) for i in range(n_config): for j in range(n_config): ax.text(j, i, arr[i, j], ha="center", va="center", color="k") plt.tight_layout() plt.savefig(out_file)
def draw_heatmap(self, xydict, **kwargs): if 'figname' in kwargs: filename = kwargs['figname'] else: filename = 'heatmap.png' if 'not_mask' in kwargs and kwargs['not_mask']: plot_array = xydict else: plot_array = np.ma.array(xydict, mask=np.tri(len(xydict), dtype=int).T) if 'filter_count' in kwargs: assert kwargs['filter_count'] > 0, "filter_count must be positive" plot_array = np.ma.array(xydict, mask=(xydict < kwargs['filter_count'])) cmap = plt.cm.jet # cmap = plt.get_cmap("Oranges") cmap.set_bad('w', 1.) plt.title(kwargs.get("title", "Heatmap")) try: if self.other_plot_kwargs.get('fixed_range', False): vmin, vmax = self.other_plot_kwargs['fixed_range'] del self.other_plot_kwargs['fixed_range'] img = plt.imshow(plot_array, vmin=vmin, vmax=vmax, interpolation='nearest', origin='lower', cmap=cmap, aspect='auto', **self.other_plot_kwargs) else: img = plt.imshow(plot_array, interpolation='nearest', origin='lower', aspect='auto', cmap=cmap, **self.other_plot_kwargs) cb = plt.colorbar(img) plt.tight_layout() plt.savefig(filename, dpi=600) plt.show() INFO("plot is saved as {}".format(filename)) plt.clf() self.other_plot_kwargs.clear() except Exception as e: try: import time t = int(time.time()) WARNING( "plotting using imshow failed: {}, " "now try to save the plotting data to /tmp/heatmap.{}.pickle" .format(e, t)) import pickle with open("/tmp/heatmap.{}.pickle", 'wb') as ofile: pickle.dump(plot_array, ofile) except Exception as e: WARNING("failed to save plotting data") try: plt.pcolormesh(plot_array.T, cmap=cmap) plt.savefig(filename) except Exception as e: WARNING("further plotting using pcolormesh failed" + str(e))
def _smacof_single( dissimilarities, metric=True, n_components=2, init=None, max_iter=300, verbose=0, eps=1e-3, random_state=None, normalized_stress=False, ): """Computes multidimensional scaling using SMACOF algorithm. Parameters ---------- dissimilarities : ndarray of shape (n_samples, n_samples) Pairwise dissimilarities between the points. Must be symmetric. metric : bool, default=True Compute metric or nonmetric SMACOF algorithm. n_components : int, default=2 Number of dimensions in which to immerse the dissimilarities. If an ``init`` array is provided, this option is overridden and the shape of ``init`` is used to determine the dimensionality of the embedding space. init : ndarray of shape (n_samples, n_components), default=None Starting configuration of the embedding to initialize the algorithm. By default, the algorithm is initialized with a randomly chosen array. max_iter : int, default=300 Maximum number of iterations of the SMACOF algorithm for a single run. verbose : int, default=0 Level of verbosity. eps : float, default=1e-3 Relative tolerance with respect to stress at which to declare convergence. The value of `eps` should be tuned separately depending on whether or not `normalized_stress` is being used. random_state : int, RandomState instance or None, default=None Determines the random number generator used to initialize the centers. Pass an int for reproducible results across multiple function calls. See :term:`Glossary <random_state>`. normalized_stress : bool, default=False Whether use and return normed stress value (Stress-1) instead of raw stress calculated by default. Only supported in non-metric MDS. The caller must ensure that if `normalized_stress=True` then `metric=False` .. versionadded:: 1.2 Returns ------- X : ndarray of shape (n_samples, n_components) Coordinates of the points in a ``n_components``-space. stress : float The final value of the stress (sum of squared distance of the disparities and the distances for all constrained points). If `normalized_stress=True`, and `metric=False` returns Stress-1. A value of 0 indicates "perfect" fit, 0.025 excellent, 0.05 good, 0.1 fair, and 0.2 poor [1]_. n_iter : int The number of iterations corresponding to the best stress. References ---------- .. [1] "Nonmetric multidimensional scaling: a numerical method" Kruskal, J. Psychometrika, 29 (1964) .. [2] "Multidimensional scaling by optimizing goodness of fit to a nonmetric hypothesis" Kruskal, J. Psychometrika, 29, (1964) .. [3] "Modern Multidimensional Scaling - Theory and Applications" Borg, I.; Groenen P. Springer Series in Statistics (1997) """ dissimilarities = check_symmetric(dissimilarities, raise_exception=True) n_samples = dissimilarities.shape[0] random_state = check_random_state(random_state) sim_flat = ((1 - np.tri(n_samples)) * dissimilarities).ravel() sim_flat_w = sim_flat[sim_flat != 0] if init is None: # Randomly choose initial configuration X = random_state.uniform(size=n_samples * n_components) X = X.reshape((n_samples, n_components)) else: # overrides the parameter p n_components = init.shape[1] if n_samples != init.shape[0]: raise ValueError( "init matrix should be of shape (%d, %d)" % (n_samples, n_components) ) X = init old_stress = None ir = IsotonicRegression() for it in range(max_iter): # Compute distance and monotonic regression dis = euclidean_distances(X) if metric: disparities = dissimilarities else: dis_flat = dis.ravel() # dissimilarities with 0 are considered as missing values dis_flat_w = dis_flat[sim_flat != 0] # Compute the disparities using a monotonic regression disparities_flat = ir.fit_transform(sim_flat_w, dis_flat_w) disparities = dis_flat.copy() disparities[sim_flat != 0] = disparities_flat disparities = disparities.reshape((n_samples, n_samples)) disparities *= np.sqrt( (n_samples * (n_samples - 1) / 2) / (disparities**2).sum() ) # Compute stress stress = ((dis.ravel() - disparities.ravel()) ** 2).sum() / 2 if normalized_stress: stress = np.sqrt(stress / ((disparities.ravel() ** 2).sum() / 2)) # Update X using the Guttman transform dis[dis == 0] = 1e-5 ratio = disparities / dis B = -ratio B[np.arange(len(B)), np.arange(len(B))] += ratio.sum(axis=1) X = 1.0 / n_samples * np.dot(B, X) dis = np.sqrt((X**2).sum(axis=1)).sum() if verbose >= 2: print("it: %d, stress %s" % (it, stress)) if old_stress is not None: if (old_stress - stress / dis) < eps: if verbose: print("breaking at iteration %d with stress %s" % (it, stress)) break old_stress = stress / dis return X, stress, it + 1
def _smacof_single( dissimilarities, metric=True, n_components=2, init=None, max_iter=300, verbose=0, eps=1e-3, random_state=None, ): """Computes multidimensional scaling using SMACOF algorithm. Parameters ---------- dissimilarities : ndarray of shape (n_samples, n_samples) Pairwise dissimilarities between the points. Must be symmetric. metric : bool, default=True Compute metric or nonmetric SMACOF algorithm. n_components : int, default=2 Number of dimensions in which to immerse the dissimilarities. If an ``init`` array is provided, this option is overridden and the shape of ``init`` is used to determine the dimensionality of the embedding space. init : ndarray of shape (n_samples, n_components), default=None Starting configuration of the embedding to initialize the algorithm. By default, the algorithm is initialized with a randomly chosen array. max_iter : int, default=300 Maximum number of iterations of the SMACOF algorithm for a single run. verbose : int, default=0 Level of verbosity. eps : float, default=1e-3 Relative tolerance with respect to stress at which to declare convergence. random_state : int, RandomState instance or None, default=None Determines the random number generator used to initialize the centers. Pass an int for reproducible results across multiple function calls. See :term: `Glossary <random_state>`. Returns ------- X : ndarray of shape (n_samples, n_components) Coordinates of the points in a ``n_components``-space. stress : float The final value of the stress (sum of squared distance of the disparities and the distances for all constrained points). n_iter : int The number of iterations corresponding to the best stress. """ dissimilarities = check_symmetric(dissimilarities, raise_exception=True) n_samples = dissimilarities.shape[0] random_state = check_random_state(random_state) sim_flat = ((1 - np.tri(n_samples)) * dissimilarities).ravel() sim_flat_w = sim_flat[sim_flat != 0] if init is None: # Randomly choose initial configuration X = random_state.rand(n_samples * n_components) X = X.reshape((n_samples, n_components)) else: # overrides the parameter p n_components = init.shape[1] if n_samples != init.shape[0]: raise ValueError("init matrix should be of shape (%d, %d)" % (n_samples, n_components)) X = init old_stress = None ir = IsotonicRegression() for it in range(max_iter): # Compute distance and monotonic regression dis = euclidean_distances(X) if metric: disparities = dissimilarities else: dis_flat = dis.ravel() # dissimilarities with 0 are considered as missing values dis_flat_w = dis_flat[sim_flat != 0] # Compute the disparities using a monotonic regression disparities_flat = ir.fit_transform(sim_flat_w, dis_flat_w) disparities = dis_flat.copy() disparities[sim_flat != 0] = disparities_flat disparities = disparities.reshape((n_samples, n_samples)) disparities *= np.sqrt( (n_samples * (n_samples - 1) / 2) / (disparities**2).sum()) # Compute stress stress = ((dis.ravel() - disparities.ravel())**2).sum() / 2 # Update X using the Guttman transform dis[dis == 0] = 1e-5 ratio = disparities / dis B = -ratio B[np.arange(len(B)), np.arange(len(B))] += ratio.sum(axis=1) X = 1.0 / n_samples * np.dot(B, X) dis = np.sqrt((X**2).sum(axis=1)).sum() if verbose >= 2: print("it: %d, stress %s" % (it, stress)) if old_stress is not None: if (old_stress - stress / dis) < eps: if verbose: print("breaking at iteration %d with stress %s" % (it, stress)) break old_stress = stress / dis return X, stress, it + 1
def __init__(self, num_emb, batch_size, emb_dim, hidden_dim, sequence_length, start_token, mid_layer, learning_rate=0.005): self.num_emb = num_emb self.batch_size = batch_size self.emb_dim = emb_dim self.hidden_dim = hidden_dim self.sequence_length = sequence_length self.start_token = tf.constant([start_token] * self.batch_size, dtype=tf.int32) self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.g_params = [] self.grad_clip = 5.0 self.mid_layer = mid_layer self.expected_reward = tf.Variable(tf.zeros([self.sequence_length])) with tf.variable_scope('generator'): self.g_embeddings = tf.Variable( self.init_matrix([self.num_emb, self.emb_dim])) self.g_params.append(self.g_embeddings) self.g_recurrent_unit = self.create_recurrent_unit( self.g_params) # maps h_tm1 to h_t for generator self.g_output_unit = self.create_output_unit( self.g_params, mid_layer) # maps h_t to o_t (output token logits) # placeholder definition self.x = tf.placeholder(tf.int32, shape=[ self.batch_size, self.sequence_length ]) # sequence of tokens generated by generator self.off_policy_prob = tf.placeholder( tf.float32, shape=[self.batch_size, self.sequence_length], name='off_policy_prob') self.baseline = tf.placeholder(tf.float32, shape=[self.sequence_length], name='baseline') self.rewards = tf.placeholder( tf.float32, shape=[self.batch_size, self.sequence_length], name='rewards') self.decay_weight = tf.placeholder(tf.float32, name="decay_weight") # processed for batch # self.f_weight = tf.Print(self.f_weight, [self.f_weight[-3:]], message='='*10) with tf.device("/cpu:0"): self.word = tf.nn.embedding_lookup(self.g_embeddings, self.x) self.processed_x = tf.transpose( self.word, perm=[1, 0, 2]) # seq_length x batch_size x emb_dim # Initial states self.h0 = tf.zeros([self.batch_size, self.hidden_dim]) self.h0 = tf.stack([self.h0, self.h0]) gen_o = tensor_array_ops.TensorArray(dtype=tf.float32, size=self.sequence_length, dynamic_size=False, infer_shape=True) gen_x = tensor_array_ops.TensorArray(dtype=tf.int32, size=self.sequence_length, dynamic_size=False, infer_shape=True) gen_h = tensor_array_ops.TensorArray(dtype=tf.float32, size=self.sequence_length, dynamic_size=False, infer_shape=True) def _g_recurrence(i, x_t, h_tm1, gen_o, gen_x): h_t = self.g_recurrent_unit(x_t, h_tm1) # hidden_memory_tuple o_t = self.g_output_unit(h_t) # batch x vocab , logits not prob log_prob = tf.log(tf.nn.softmax(o_t)) next_token = tf.cast( tf.reshape(tf.multinomial(log_prob, 1), [self.batch_size]), tf.int32) x_tp1 = tf.nn.embedding_lookup(self.g_embeddings, next_token) # batch x emb_dim gen_o = gen_o.write( i, tf.reduce_sum( tf.multiply(tf.one_hot(next_token, self.num_emb, 1.0, 0.0), tf.nn.softmax(o_t)), 1)) # [batch_size] , prob gen_x = gen_x.write(i, next_token) # indices, batch_size return i + 1, x_tp1, h_t, gen_o, gen_x _, _, _, self.gen_o, self.gen_x = control_flow_ops.while_loop( cond=lambda i, _1, _2, _3, _4: i < self.sequence_length, body=_g_recurrence, loop_vars=(tf.constant(0, dtype=tf.int32), tf.nn.embedding_lookup(self.g_embeddings, self.start_token), self.h0, gen_o, gen_x)) self.gen_x = self.gen_x.stack() # seq_length x batch_size self.gen_x = tf.transpose(self.gen_x, perm=[1, 0]) # batch_size x seq_length # supervised pretraining for generator g_predictions = tensor_array_ops.TensorArray(dtype=tf.float32, size=self.sequence_length, dynamic_size=False, infer_shape=True) ta_emb_x = tensor_array_ops.TensorArray(dtype=tf.float32, size=self.sequence_length) ta_emb_x = ta_emb_x.unstack(self.processed_x) def _pretrain_recurrence(i, x_t, h_tm1, g_predictions, gen_h): gen_h = gen_h.write(i, tf.unstack(h_tm1)[0]) h_t = self.g_recurrent_unit(x_t, h_tm1) o_t = self.g_output_unit(h_t) g_predictions = g_predictions.write( i, tf.nn.softmax(o_t)) # batch x vocab_size x_tp1 = ta_emb_x.read(i) return i + 1, x_tp1, h_t, g_predictions, gen_h _, _, _, self.g_predictions, self.gen_h = control_flow_ops.while_loop( cond=lambda i, _1, _2, _3, _4: i < self.sequence_length, body=_pretrain_recurrence, loop_vars=(tf.constant(0, dtype=tf.int32), tf.nn.embedding_lookup(self.g_embeddings, self.start_token), self.h0, g_predictions, gen_h)) # CalculateMean cross-entropy loss self.g_predictions = tf.transpose( self.g_predictions.stack(), perm=[1, 0, 2]) # batch_size x seq_length x vocab_size # self.log_pred = tf.one_hot(tf.to_int32(tf.reshape(self.x, [-1])), self.num_emb, 1.0, 0.0) * \ # tf.log(tf.reshape(self.g_predictions, [-1, self.num_emb])) # clip_log_pred & log_pred : batch*seq x vocab_size self.clipped_log_pred = tf.one_hot( tf.to_int32(tf.reshape(self.x, [-1])), self.num_emb, 1.0, 0.0) * tf.log( tf.clip_by_value( tf.reshape(self.g_predictions, [-1, self.num_emb]), 1e-20, 1.0)) self.sent_log = tf.reduce_sum(tf.reshape( tf.reduce_sum(self.clipped_log_pred, -1), [self.batch_size, self.sequence_length]), axis=1) # pretraining loss self.pretrain_loss = -tf.reduce_sum(self.clipped_log_pred) / ( self.sequence_length * self.batch_size) # training updates pretrain_opt = self.optimizer(self.learning_rate) self.pretrain_grad, _ = tf.clip_by_global_norm( tf.gradients(self.pretrain_loss, self.g_params), self.grad_clip) self.pretrain_updates = pretrain_opt.apply_gradients( zip(self.pretrain_grad, self.g_params)) ####################################################################################################### # Unsupervised Training ####################################################################################################### log_pred = tf.reduce_sum(self.clipped_log_pred, -1) # log_pred: batch * seq (1 dim) bz_log_pred = tf.reshape(log_pred, [self.batch_size, self.sequence_length]) #sig_bz_log_pred = tf.nn.sigmoid(tf.reshape(log_pred, [self.batch_size, self.sequence_length])) sig_bz_log_pred = tf.reshape(log_pred, [self.batch_size, self.sequence_length]) accumlated_pred = tf.matmul( sig_bz_log_pred, tf.constant(np.tri(self.sequence_length), dtype=tf.float32)) accumlated_pred = tf.stop_gradient(accumlated_pred) ratio = tf.exp(bz_log_pred - self.off_policy_prob) # ratio = tf.Print(ratio, [ratio[:2]], message='*'*10, summarize=100) clipped_ratio = tf.clip_by_value(ratio, 0.8, 1.2) choice_a = ratio * ( self.rewards - accumlated_pred * self.decay_weight - self.baseline) choice_b = clipped_ratio * ( self.rewards - accumlated_pred * self.decay_weight - self.baseline) self.g_loss = -tf.reduce_mean(tf.minimum(choice_a, choice_b)) g_opt = self.optimizer(self.learning_rate) self.g_grad, _ = tf.clip_by_global_norm( tf.gradients(self.g_loss, self.g_params), self.grad_clip) self.g_updates = g_opt.apply_gradients(zip(self.g_grad, self.g_params))
def _to_triangular(j, h): h = h + j.diagonal() j = (1 - np.tri(h.size)) * (j + j.T) return j, h
def convert_calendar_decimal(year, month, day=None, hour=None, minute=None, second=None, DofY=None): """ Converts from calendar date into decimal years taking into account leap years Dershowitz, N. and E.M. Reingold. 2008. Calendrical Calculations. Cambridge: Cambridge University Press. Arguments --------- year: calendar year month: calendar month Keyword arguments ----------------- day: day of the month hour: hour of the day minute: minute of the hour second: second of the minute DofY: day of the year (January 1 = 1) Returns ------- t_date: date in decimal-year format """ #-- number of dates n_dates = len(np.atleast_1d(year)) #-- create arrays for calendar date variables cal_date = {} cal_date['year'] = np.zeros((n_dates)) cal_date['month'] = np.zeros((n_dates)) cal_date['day'] = np.zeros((n_dates)) cal_date['hour'] = np.zeros((n_dates)) cal_date['minute'] = np.zeros((n_dates)) cal_date['second'] = np.zeros((n_dates)) #-- day of the year cal_date['DofY'] = np.zeros((n_dates)) #-- remove singleton dimensions and use year and month cal_date['year'][:] = np.squeeze(year) cal_date['month'][:] = np.squeeze(month) #-- create output date variable t_date = np.zeros((n_dates)) #-- days per month in a leap and a standard year #-- only difference is February (29 vs. 28) dpm_leap = np.array([31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], dtype=np.float) dpm_stnd = np.array([31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], dtype=np.float) #-- Rules in the Gregorian calendar for a year to be a leap year: #-- divisible by 4, but not by 100 unless divisible by 400 #-- True length of the year is about 365.2422 days #-- Adding a leap day every four years ==> average 365.25 #-- Subtracting a leap year every 100 years ==> average 365.24 #-- Adding a leap year back every 400 years ==> average 365.2425 #-- Subtracting a leap year every 4000 years ==> average 365.24225 m4 = (cal_date['year'] % 4) m100 = (cal_date['year'] % 100) m400 = (cal_date['year'] % 400) m4000 = (cal_date['year'] % 4000) #-- find indices for standard years and leap years using criteria leap, = np.nonzero((m4 == 0) & (m100 != 0) | (m400 == 0) & (m4000 != 0)) stnd, = np.nonzero((m4 != 0) | (m100 == 0) & (m400 != 0) | (m4000 == 0)) #-- calculate the day of the year if DofY is not None: #-- if entered directly as an input #-- remove 1 so day 1 (Jan 1st) = 0.0 in decimal format cal_date['DofY'][:] = np.squeeze(DofY) - 1 else: #-- use calendar month and day of the month to calculate day of the year #-- month minus 1: January = 0, February = 1, etc (indice of month) #-- in decimal form: January = 0.0 month_m1 = np.array(cal_date['month'], dtype=np.int) - 1 #-- day of month if day is not None: #-- remove 1 so 1st day of month = 0.0 in decimal format cal_date['day'][:] = np.squeeze(day) - 1.0 else: #-- if not entering days as an input #-- will use the mid-month value cal_date['day'][leap] = dpm_leap[month_m1[leap]] / 2.0 cal_date['day'][stnd] = dpm_stnd[month_m1[stnd]] / 2.0 #-- create matrix with the lower half = 1 #-- this matrix will be used in a matrix multiplication #-- to calculate the total number of days for prior months #-- the -1 will make the diagonal == 0 #-- i.e. first row == all zeros and the #-- last row == ones for all but the last element mon_mat = np.tri(12, 12, -1) #-- using a dot product to calculate total number of days #-- for the months before the input date #-- basically is sum(i*dpm) #-- where i is 1 for all months < the month of interest #-- and i is 0 for all months >= the month of interest #-- month of interest is zero as the exact days will be #-- used to calculate the date #-- calculate the day of the year for leap and standard #-- use total days of all months before date #-- and add number of days before date in month cal_date['DofY'][stnd] = cal_date['day'][stnd] + \ np.dot(mon_mat[month_m1[stnd],:],dpm_stnd) cal_date['DofY'][leap] = cal_date['day'][leap] + \ np.dot(mon_mat[month_m1[leap],:],dpm_leap) #-- hour of day (else is zero) if hour is not None: cal_date['hour'][:] = np.squeeze(hour) #-- minute of hour (else is zero) if minute is not None: cal_date['minute'][:] = np.squeeze(minute) #-- second in minute (else is zero) if second is not None: cal_date['second'][:] = np.squeeze(second) #-- calculate decimal date #-- convert hours, minutes and seconds into days #-- convert calculated fractional days into decimal fractions of the year #-- Leap years t_date[leap] = cal_date['year'][leap] + \ (cal_date['DofY'][leap] + cal_date['hour'][leap]/24. + \ cal_date['minute'][leap]/1440. + \ cal_date['second'][leap]/86400.)/np.sum(dpm_leap) #-- Standard years t_date[stnd] = cal_date['year'][stnd] + \ (cal_date['DofY'][stnd] + cal_date['hour'][stnd]/24. + \ cal_date['minute'][stnd]/1440. + \ cal_date['second'][stnd]/86400.)/np.sum(dpm_stnd) return t_date
def fit_3_layer_profile(depths, crr_n15s, fitting_values, crr_non_liq=0.6, max_depth=20): """ Determines the best 3-layer profile fit for a given parameter Method uses brut force different CRR values. For each CRR is determines the change points and then evaluates which change points are best. Sets the caps the other layer crr values to the non liquefying value best to use 0.6 as this is approx the upper limit of liquefiable soil. Then the error for over-fitting (defining a non-liq layer as liquefiable) incurs an error of 0.6-crr_liq, while under-fitting layers less than or equal to crr_liq incurs the same error while layers in between crr_liq and the non-liq limit incur an error proportional to the difference from the defined and calculated crr. Parameters ---------- depths: array, distance from surface crr_n15s: array, actual values to fit to (note CRR_max=4) fitting_values: array, possible values of layer 2 crr_non_liq: float, value for layers 1 and 3 :return: """ if depths[-1] > max_depth: indy = np.where(depths > max_depth)[0][0] else: indy = len(depths) std_depths = depths[:indy] std_crr_n15s = crr_n15s[:indy] n_depths = len(std_depths) capped_values = np.clip(std_crr_n15s, None, crr_non_liq) diffs = capped_values[:, np.newaxis] - fitting_values[np.newaxis, :] cdiffs = np.cumsum(np.abs(diffs), axis=0) diffs = [] h_crusts = [] h_liqs = [] for ii in range(len(fitting_values)): eline = cdiffs[:, 0] - cdiffs[:, ii] peak_ids = eqsig.get_peak_array_indices(eline) if eline[peak_ids[1]] > 0: peak_ids = peak_ids[:-1] else: peak_ids = peak_ids[1:-1] if len(peak_ids): loc_min_is = np.take(peak_ids, np.arange(0, len(peak_ids), 2)) loc_max_is = np.take(peak_ids, np.arange(1, len(peak_ids), 2)) loc_mins = np.take(eline, loc_min_is) loc_maxs = np.take(eline, loc_max_is) opts = loc_mins[np.newaxis, :] - loc_maxs[:, np.newaxis] opts *= np.tri( *opts.shape ) # remove cases where liq layer is higher than crust min_i, min_j = np.unravel_index(opts.argmin(), opts.shape) i_crust = loc_min_is[min_j] i_liq = loc_max_is[min_i] h_crusts.append(depths[i_crust + 1]) h_liqs.append(depths[i_liq + 1] - depths[i_crust + 1]) refined_errors = cdiffs[i_crust][0] + ( cdiffs[i_liq][ii] - cdiffs[i_crust][ii]) + (cdiffs[-1][0] - cdiffs[i_liq][0]) diffs.append(refined_errors) else: h_crusts.append(depths[-1]) h_liqs.append(0) diffs.append(1e6) i_best = np.argmin(diffs) h_crust = h_crusts[i_best] h_liq = h_liqs[i_best] p_value = fitting_values[i_best] diff = diffs[i_best] normed_diff = diff / (n_depths * crr_non_liq) return h_crust, h_liq, p_value, normed_diff
def test_tri_1(self): print(np.tri(3, 5, 2, dtype=int)) print("***********") print(np.tri(3, 5, -1))
def generate_flattened_label_mask(labels): v, h = np.meshgrid(labels, labels) mask = (v == h).astype(np.uint8) indecies = np.where(np.tri(len(labels), k=-1)) return mask[indecies]
def _speeds_of_params(self, int_speeds=False, fast_slow=False): """ Separates the sampled parameters in blocks according to the likelihood (or theory) re-evaluation that changing each one of them involves. Using the appoximate speed (i.e. inverse evaluation time in seconds) of each likelihood, sorts the blocks in an optimal way, in ascending order of speed *per full block iteration*. Returns tuples of ``(speeds), (params_in_block)``, sorted by ascending speeds, where speeds are *per param* (though optimal blocking is chosen by speed *per full block*). If ``int_speeds=True``, returns integer speeds, instead of speeds in 1/s. If ``fast_slow=True``, returns just 2 blocks: a fast and a slow one, each one assigned its slowest per-parameter speed. """ # Fill unknown speeds with the value of the slowest one, and clip with overhead speeds = np.array([getattr(self[like], "speed", -1) for like in self] + ([getattr(self.theory, "speed", -1)] if self.theory else []), dtype=float) # Add overhead to the defined ones, and clip to the slowest the undefined ones speeds[speeds > 0] = (speeds[speeds > 0] ** -1 + self.overhead) ** -1 try: speeds = np.clip(speeds, min(speeds[speeds > 0]), None) except ValueError: # No speeds specified speeds = np.ones(len(speeds)) likes = list(self) + ([_theory] if self.theory else []) for i, like in enumerate(likes): self[like].speed = speeds[i] # Compute "footprint" # i.e. likelihoods (and theory) that we must recompute when each parameter changes footprints = np.zeros((len(self.sampled_like_dependence), len(likes)), dtype=int) for i, ls in enumerate(self.sampled_like_dependence.values()): for j, like in enumerate(likes): footprints[i, j] = like in ls # Group parameters by footprint different_footprints = list(set([tuple(row) for row in footprints.tolist()])) blocks = [[p for ip, p in enumerate(self.sampled_like_dependence) if all(footprints[ip] == fp)] for fp in different_footprints] # Find optimal ordering, such that one minimises the time it takes to vary every # parameter, one by one, in a basis in which they are mixed-down (i.e after a # Cholesky transformation) # To do that, compute that "total cost" for every permutation of the blocks order, # and find the minumum. n_params_per_block = np.array([len(b) for b in blocks]) self._costs = 1 / np.array(speeds) self._footprints = np.array(different_footprints) self._lower = np.tri(len(n_params_per_block)) def get_cost_per_param_per_block(ordering): """ Computes cumulative cost per parameter for each block, given ordering. """ footprints_chol = np.minimum( 1, self._footprints[ordering].T.dot(self._lower).T) return footprints_chol.dot(self._costs) orderings = list(permutations(np.arange(len(n_params_per_block)))) costs_per_param_per_block = np.array( [get_cost_per_param_per_block(list(o)) for o in orderings]) total_costs = np.array( [n_params_per_block[list(o)].dot(costs_per_param_per_block[i]) for i, o in enumerate(orderings)]) i_optimal = np.argmin(total_costs) optimal_ordering = orderings[i_optimal] blocks = [blocks[i] for i in optimal_ordering] costs_per_param_per_block = costs_per_param_per_block[i_optimal] # This costs are *cumulative-down* (i.e. take into account the cost of varying the # parameters below the present one). Subtract that effect so that its inverse, # the speeds, are equivalent to oversampling factors costs_per_param_per_block[:-1] -= costs_per_param_per_block[1:] params_speeds = 1 / costs_per_param_per_block if int_speeds: # Oversampling precision: smallest difference in oversampling to be ignored. speed_precision = 1 / 10 speeds = np.array(np.round(np.array( params_speeds) / min(params_speeds) / speed_precision), dtype=int) params_speeds = np.array( speeds / np.ufunc.reduce(np.frompyfunc(gcd, 2, 1), speeds), dtype=int) self.log.debug("Optimal ordering of parameter blocks: %r with speeds %r", blocks, params_speeds) # Fast-slow separation: chooses separation that maximizes log-difference in speed # (speed per parameter in a combination of blocks is the slowest one) if fast_slow: if len(blocks) > 1: log_differences = np.zeros(len(blocks) - 1) for i in range(len(blocks) - 1): log_differences[i] = (np.log(np.min(params_speeds[:i + 1])) - np.log(np.min(params_speeds[i + 1:]))) i_max = np.argmin(log_differences) blocks = ( lambda l: [list(chain(*l[:i_max + 1])), list(chain(*l[i_max + 1:]))])(blocks) # In this case, speeds must be *cumulative*, since I am squashing blocks cum_inv = lambda ss: 1 / (sum(1 / ss)) params_speeds = ( lambda l: [cum_inv(l[:i_max + 1]), cum_inv(l[i_max + 1:])])(params_speeds) self.log.debug("Fast-slow blocking: %r with speeds %r", blocks, params_speeds) else: self.log.warning("Requested fast/slow separation, " "but all pararameters have the same speed.") return params_speeds, blocks
import numpy as np import pymc3 as pm from scipy import stats import matplotlib.pyplot as plt import pandas as pd import seaborn as sns plt.style.use('seaborn-darkgrid') iris = sns.load_dataset('iris', data_home="./BAP") corr = iris[iris['species'] != 'virginica'].corr() mask = np.tri(*corr.shape).T sns.heatmap(corr, mask=mask, annot=True, cmap='viridis') plt.savefig('img509.png')
def main(N=72, seed=42, mprocs=2, nprocs=2, dtype=float): gen = np.random.RandomState(seed) grid = BlacsGrid(world, mprocs, nprocs) if (dtype == complex): epsilon = 1.0j else: epsilon = 0.0 # Create descriptors for matrices on master: glob = grid.new_descriptor(N, N, N, N) # print globA.asarray() # Populate matrices local to master: H0 = glob.zeros(dtype=dtype) + gen.rand(*glob.shape) S0 = glob.zeros(dtype=dtype) + gen.rand(*glob.shape) C0 = glob.empty(dtype=dtype) if rank == 0: # Complex case must have real numbers on the diagonal. # We make a simple complex Hermitian matrix below. H0 = H0 + epsilon * (0.1 * np.tri(N, N, k=-N // nprocs) + 0.3 * np.tri(N, N, k=-1)) S0 = S0 + epsilon * (0.2 * np.tri(N, N, k=-N // nprocs) + 0.4 * np.tri(N, N, k=-1)) # Make matrices symmetric rk(1.0, H0.copy(), 0.0, H0) rk(1.0, S0.copy(), 0.0, S0) # Overlap matrix must be semi-positive definite S0 = S0 + 50.0 * np.eye(N, N, 0) # Hamiltonian is usually diagonally dominant H0 = H0 + 75.0 * np.eye(N, N, 0) C0 = S0.copy() S0_inv = S0.copy() # Local result matrices W0 = np.empty((N), dtype=float) W0_g = np.empty((N), dtype=float) # Calculate eigenvalues / other serial results if rank == 0: diagonalize(H0.copy(), W0) general_diagonalize(H0.copy(), W0_g, S0.copy()) inverse_cholesky(C0) # result returned in lower triangle tri2full(S0_inv, 'L') S0_inv = inv(S0_inv) # tri2full(C0) # symmetrize assert glob.check(H0) and glob.check(S0) and glob.check(C0) # Create distributed destriptors with various block sizes: dist = grid.new_descriptor(N, N, 8, 8) # Distributed matrices: # We can use empty here, but end up with garbage on # on the other half of the triangle when we redistribute. # This is fine because ScaLAPACK does not care. H = dist.empty(dtype=dtype) S = dist.empty(dtype=dtype) Sinv = dist.empty(dtype=dtype) Z = dist.empty(dtype=dtype) C = dist.empty(dtype=dtype) Sinv = dist.empty(dtype=dtype) # Eigenvalues are non-BLACS matrices W = np.empty((N), dtype=float) W_dc = np.empty((N), dtype=float) W_mr3 = np.empty((N), dtype=float) W_g = np.empty((N), dtype=float) W_g_dc = np.empty((N), dtype=float) W_g_mr3 = np.empty((N), dtype=float) Glob2dist = Redistributor(world, glob, dist) Glob2dist.redistribute(H0, H, uplo='L') Glob2dist.redistribute(S0, S, uplo='L') Glob2dist.redistribute(S0, C, uplo='L') # C0 was previously overwritten Glob2dist.redistribute(S0, Sinv, uplo='L') # we don't test the expert drivers anymore since there # might be a buffer overflow error ## scalapack_diagonalize_ex(dist, H.copy(), Z, W, 'L') scalapack_diagonalize_dc(dist, H.copy(), Z, W_dc, 'L') ## scalapack_diagonalize_mr3(dist, H.copy(), Z, W_mr3, 'L') ## scalapack_general_diagonalize_ex(dist, H.copy(), S.copy(), Z, W_g, 'L') scalapack_general_diagonalize_dc(dist, H.copy(), S.copy(), Z, W_g_dc, 'L') ## scalapack_general_diagonalize_mr3(dist, H.copy(), S.copy(), Z, W_g_mr3, 'L') scalapack_inverse_cholesky(dist, C, 'L') if dtype == complex: # Only supported for complex for now scalapack_inverse(dist, Sinv, 'L') # Undo redistribute C_test = glob.empty(dtype=dtype) Sinv_test = glob.empty(dtype=dtype) Dist2glob = Redistributor(world, dist, glob) Dist2glob.redistribute(C, C_test) Dist2glob.redistribute(Sinv, Sinv_test) if rank == 0: ## diag_ex_err = abs(W - W0).max() diag_dc_err = abs(W_dc - W0).max() ## diag_mr3_err = abs(W_mr3 - W0).max() ## general_diag_ex_err = abs(W_g - W0_g).max() general_diag_dc_err = abs(W_g_dc - W0_g).max() ## general_diag_mr3_err = abs(W_g_mr3 - W0_g).max() inverse_chol_err = abs(C_test - C0).max() tri2full(Sinv_test, 'L') inverse_err = abs(Sinv_test - S0_inv).max() ## print 'diagonalize ex err', diag_ex_err print('diagonalize dc err', diag_dc_err) ## print 'diagonalize mr3 err', diag_mr3_err ## print 'general diagonalize ex err', general_diag_ex_err print('general diagonalize dc err', general_diag_dc_err) ## print 'general diagonalize mr3 err', general_diag_mr3_err print('inverse chol err', inverse_chol_err) if dtype == complex: print('inverse err', inverse_err) else: ## diag_ex_err = 0.0 diag_dc_err = 0.0 ## diag_mr3_err = 0.0 ## general_diag_ex_err = 0.0 general_diag_dc_err = 0.0 ## general_diag_mr3_err = 0.0 inverse_chol_err = 0.0 inverse_err = 0.0 # We don't like exceptions on only one cpu ## diag_ex_err = world.sum(diag_ex_err) diag_dc_err = world.sum(diag_dc_err) ## diag_mr3_err = world.sum(diag_mr3_err) ## general_diag_ex_err = world.sum(general_diag_ex_err) general_diag_dc_err = world.sum(general_diag_dc_err) ## general_diag_mr3_err = world.sum(general_diag_mr3_err) inverse_chol_err = world.sum(inverse_chol_err) inverse_err = world.sum(inverse_err) ## assert diag_ex_err < tol assert diag_dc_err < tol ## assert diag_mr3_err < tol ## assert general_diag_ex_err < tol assert general_diag_dc_err < tol ## assert general_diag_mr3_err < tol assert inverse_chol_err < tol if dtype == complex: assert inverse_err < tol