def set_params(self, values): self.lengthscales = values[:-1] self.variance = values[-1] L = np.zeros((self.num_dim, self.num_dim)) L[np.tril_indices_from(L)] = self.lengthscales self.L_inv = inv(L) self.projection = np.dot(self.L_inv.T, self.L_inv)
def test_map_diag_and_offdiag(self): vars = ["x", "y", "z"] g = ag.PairGrid(self.df) g.map_offdiag(plt.scatter) g.map_diag(plt.hist) for ax in g.diag_axes: nt.assert_equal(len(ax.patches), 10) for i, j in zip(*np.triu_indices_from(g.axes, 1)): ax = g.axes[i, j] x_in = self.df[vars[j]] y_in = self.df[vars[i]] x_out, y_out = ax.collections[0].get_offsets().T npt.assert_array_equal(x_in, x_out) npt.assert_array_equal(y_in, y_out) for i, j in zip(*np.tril_indices_from(g.axes, -1)): ax = g.axes[i, j] x_in = self.df[vars[j]] y_in = self.df[vars[i]] x_out, y_out = ax.collections[0].get_offsets().T npt.assert_array_equal(x_in, x_out) npt.assert_array_equal(y_in, y_out) for i, j in zip(*np.diag_indices_from(g.axes)): ax = g.axes[i, j] nt.assert_equal(len(ax.collections), 0)
def transform_covars_grad(self, internal_grad): grad = np.empty((self.num_latent, self.get_covar_size()), dtype=np.float32) for j in range(self.num_latent): tmp = self._theano_transform_covars_grad(internal_grad[0, j], self.covars_cholesky[j]) tmp[np.diag_indices_from(tmp)] *= self.covars_cholesky[j][np.diag_indices_from(tmp)] grad[j] = tmp[np.tril_indices_from(self.covars_cholesky[j])] return grad.flatten()
def _get_raw_covars(self): flattened_covars = np.empty([self.num_latent, self.get_covar_size()], dtype=np.float32) for i in xrange(self.num_latent): raw_covars = self.covars_cholesky[i].copy() raw_covars[np.diag_indices_from(raw_covars)] = np.log(raw_covars[np.diag_indices_from(raw_covars)]) flattened_covars[i] = raw_covars[np.tril_indices_from(raw_covars)] return flattened_covars.flatten()
def test_pairplot(self): vars = ["x", "y", "z"] g = pairplot(self.df) for ax in g.diag_axes: nt.assert_equal(len(ax.patches), 10) for i, j in zip(*np.triu_indices_from(g.axes, 1)): ax = g.axes[i, j] x_in = self.df[vars[j]] y_in = self.df[vars[i]] x_out, y_out = ax.collections[0].get_offsets().T npt.assert_array_equal(x_in, x_out) npt.assert_array_equal(y_in, y_out) for i, j in zip(*np.tril_indices_from(g.axes, -1)): ax = g.axes[i, j] x_in = self.df[vars[j]] y_in = self.df[vars[i]] x_out, y_out = ax.collections[0].get_offsets().T npt.assert_array_equal(x_in, x_out) npt.assert_array_equal(y_in, y_out) for i, j in zip(*np.diag_indices_from(g.axes)): ax = g.axes[i, j] nt.assert_equal(len(ax.collections), 0) plt.close("all")
def map_lower(self, func, **kwargs): """Plot with a bivariate function on the lower diagonal subplots. Parameters ---------- func : callable plotting function Must take x, y arrays as positional arguments and draw onto the "currently active" matplotlib Axes. """ kw_color = kwargs.pop("color", None) for i, j in zip(*np.tril_indices_from(self.axes, -1)): hue_grouped = self.data.groupby(self.hue_vals) for k, (label_k, data_k) in enumerate(hue_grouped): ax = self.axes[i, j] plt.sca(ax) x_var = self.x_vars[j] y_var = self.y_vars[i] color = self.palette[k] if kw_color is None else kw_color func(data_k[x_var], data_k[y_var], label=label_k, color=color, **kwargs) self._clean_axis(ax) self._update_legend_data(ax) if kw_color is not None: kwargs["color"] = kw_color self._add_axis_labels()
def net_sample_multinomial(A, minEdges, edgesPerSample=1, *args, **kwargs): """ NETWORK SAMPLING ALGORITHM: sample networks ties from multinomial distribution defined as 1/AAT[i,j] normalized by sum(AAT[i>j]) problem: doesn't sufficiently cluster the resulting network doesn't return exact number of ties, only at least as many as specified minEdges """ draws = int(np.ceil(minEdges*1.2)) # pairwise distances between observations dist = pdist(A) # what matrix to use: pdist(A) or just tril(AAT) directly? invdist = dist invdist[invdist != 0] = 1/invdist[invdist!=0] # prevent division by 0 thetavec = invdist / np.sum(invdist) theta = squareform(thetavec) # multinomial sample n = np.shape(theta)[0] Z = np.zeros((n,n)) # samp = sampleLinks(q=thetavec, edgesToDraw=1, draws=draws) y = np.random.multinomial(edgesPerSample, thetavec, draws) samp = np.asarray([np.mean([y[draw][item] for draw in np.arange(draws)]) for item in np.arange(len(thetavec))]) samp = np.ceil(samp) # repeat until reaching enough network ties while np.sum(samp) < minEdges: draws = int(np.ceil(draws * 1.1)) #increase number of draws and try again #samp = sampleLinks(q=thetavec,edgesToDraw=1,draws=draws) y = np.random.multinomial(edgesPerSample, thetavec, draws) samp = np.asarray([np.mean([y[draw][item] for draw in np.arange(draws)]) for item in np.arange(len(thetavec))]) samp = np.ceil(samp) Z[np.tril_indices_from(Z, k =-1)] = samp return (theta, Z)
def test_pairplot_reg(self): vars = ["x", "y", "z"] g = ag.pairplot(self.df, diag_kind="hist", kind="reg") for ax in g.diag_axes: nt.assert_equal(len(ax.patches), 10) for i, j in zip(*np.triu_indices_from(g.axes, 1)): ax = g.axes[i, j] x_in = self.df[vars[j]] y_in = self.df[vars[i]] x_out, y_out = ax.collections[0].get_offsets().T npt.assert_array_equal(x_in, x_out) npt.assert_array_equal(y_in, y_out) nt.assert_equal(len(ax.lines), 1) nt.assert_equal(len(ax.collections), 2) for i, j in zip(*np.tril_indices_from(g.axes, -1)): ax = g.axes[i, j] x_in = self.df[vars[j]] y_in = self.df[vars[i]] x_out, y_out = ax.collections[0].get_offsets().T npt.assert_array_equal(x_in, x_out) npt.assert_array_equal(y_in, y_out) nt.assert_equal(len(ax.lines), 1) nt.assert_equal(len(ax.collections), 2) for i, j in zip(*np.diag_indices_from(g.axes)): ax = g.axes[i, j] nt.assert_equal(len(ax.collections), 0)
def test_pairplot(self): vars = ["x", "y", "z"] g = ag.pairplot(self.df) for ax in g.diag_axes: assert len(ax.patches) > 1 for i, j in zip(*np.triu_indices_from(g.axes, 1)): ax = g.axes[i, j] x_in = self.df[vars[j]] y_in = self.df[vars[i]] x_out, y_out = ax.collections[0].get_offsets().T npt.assert_array_equal(x_in, x_out) npt.assert_array_equal(y_in, y_out) for i, j in zip(*np.tril_indices_from(g.axes, -1)): ax = g.axes[i, j] x_in = self.df[vars[j]] y_in = self.df[vars[i]] x_out, y_out = ax.collections[0].get_offsets().T npt.assert_array_equal(x_in, x_out) npt.assert_array_equal(y_in, y_out) for i, j in zip(*np.diag_indices_from(g.axes)): ax = g.axes[i, j] nt.assert_equal(len(ax.collections), 0) g = ag.pairplot(self.df, hue="a") n = len(self.df.a.unique()) for ax in g.diag_axes: assert len(ax.lines) == n assert len(ax.collections) == n
def __init__(self, lengthscale_mat, variance=1.0): lengthscale_mat = np.asarray(lengthscale_mat) assert lengthscale_mat.shape[0] == lengthscale_mat.shape[1] self.num_dim = lengthscale_mat.shape[0] self.params = np.concatenate(( lengthscale_mat[np.tril_indices_from(lengthscale_mat)], np.array([variance])))
def set_covars(self, raw_covars): raw_covars = raw_covars.reshape([self.num_latent, self.get_covar_size()]) for j in xrange(self.num_latent): cholesky = np.zeros([self.num_dim, self.num_dim], dtype=np.float32) cholesky[np.tril_indices_from(cholesky)] = raw_covars[j] cholesky[np.diag_indices_from(cholesky)] = np.exp(cholesky[np.diag_indices_from(cholesky)]) self.covars_cholesky[j] = cholesky self.covars[j] = mdot(self.covars_cholesky[j], self.covars_cholesky[j].T)
def find_smallest_index(matrice): """Return smallest number i,j index in a matrice A Tuple (i,j) is returned. Warning, the diagonal should have the largest number so it will never be choose """ index = np.tril_indices_from(matrice, -1) return np.vstack(index)[:, matrice[index].argmin()]
def shepard(self, xax=1, yax=2): coords = self.U[:,[xax-1, yax-1]] reducedD = np.zeros((coords.shape[0], coords.shape[0])) for i in xrange(coords.shape[0]): for j in xrange(coords.shape[0]): d = coords[i,:] - coords[j,:] reducedD[i, j] = np.sqrt( d.dot(d) ) reducedD = reducedD[np.tril_indices_from(reducedD, k=-1)] originalD = self.y2[np.tril_indices_from(self.y2, k=-1)] xmin = np.min(reducedD) xmax = np.max(reducedD) f, ax = py.subplots() ax.plot(reducedD, originalD, 'ko') ax.plot([xmin, xmax], [xmin, xmax], 'r--') ax.set_xlabel('Distances in Reduced Space') ax.set_ylabel('Distances in Original Matrix') py.show()
def _band_infinite(): '''Suppress the diagonal+- of a distance matrix''' band = np.empty( (t, t) ) band[:] = np.inf band[np.triu_indices_from(band, width)] = 0 band[np.tril_indices_from(band, -width)] = 0 return band
def from_vector(x): # Solution to the equation len(x) = n * (n + 1) / 2 n = int((math.sqrt(len(x) * 8 + 1) - 1) / 2) result = np.zeros((n, n)) result[np.tril_indices_from(result, -1)] = x[n:] result += result.transpose() result[np.diag_indices_from(result)] = x[:n] return result
def sort_links_by_weight(corr_mat, ok_nodes, include_mst): """ Sort the links by their link-weight Parameters ---------- corr_mat : np.array 2D numpy array with bad nodes. ok_nodes : np.array the bool blacklist (whitelist) include_mst : Bool If true add the maximum spanning tree to the begining of sorted list Returns ------- edgelist : numpy structrued array (node1, node2, weight) array([(0, 1, 1.0), (0, 3, 0.5), (2, 3, 0.5), (0, 4, 0.7), (1, 4, 0.4)], dtype=[('node1', '<i4'), ('node2', '<i4'), ('weight', '<f8')]) """ up_diag_matrix = _get_filtered_triu_adj_mat_copy(corr_mat, ok_nodes) n = len(up_diag_matrix) minVal = np.min(up_diag_matrix) minValMinusOne = np.min(up_diag_matrix) - 1 # So that possible overflows don't go unnoticed assert minValMinusOne < minVal initEdges = np.array(np.triu_indices_from(up_diag_matrix, 1)).T weights = up_diag_matrix[np.triu_indices_from(up_diag_matrix, 1)] nLinksMax = (n * (n - 1)) / 2 nLinksMST = 0 edgelist = np.zeros(nLinksMax, dtype=[('node1', 'i4'), ('node2', 'i4'), ('weight', 'f8')]) # Get the maximum spanning tree (Multyplying the weights by -1 does the # trick) if include_mst: g = igraph.Graph(n, list(initEdges), directed=False) mst = g.spanning_tree(-1 * weights, return_tree=False) for i, ei in enumerate(mst): edge = g.es[ei] edgelist[i] = edge.source, edge.target, weights[ei] # Take these links away from the orig. mat up_diag_matrix[edge.source, edge.target] = minValMinusOne nLinksMST = len(mst) # How many links we still need to take after (possible) MST: nLinksYetToTake = np.max([nLinksMax - nLinksMST, 0]) # mst already there # Get the next largest indices up_diag_matrix[np.tril_indices_from(up_diag_matrix, 0)] = minValMinusOne mflat = up_diag_matrix.flatten() flatindices = mflat.argsort()[::-1][:nLinksYetToTake] edgelist[nLinksMST:]['node1'], edgelist[nLinksMST:][ 'node2'] = np.unravel_index(flatindices, (n, n)) edgelist[nLinksMST:]['weight'] = mflat[flatindices] return edgelist
def score_samples(self, graph, clip=None): """ Compute the weighted log probabilities for each potential edge. Note that this implicitly assumes the input graph is indexed like the fit model. Parameters ---------- graph : np.ndarray Input graph. Must be same shape as model's :attr:`p_mat_` attribute clip : scalar or None, optional (default=None) Values for which to clip probability matrix, entries less than c or more than 1 - c are set to c or 1 - c, respectively. If None, values will not be clipped in the likelihood calculation, which may result in poorly behaved likelihoods depending on the model. Returns ------- sample_scores : np.ndarray (size of ``graph``) log-likelihood per potential edge in the graph """ check_is_fitted(self, "p_mat_") # P.ravel() <dot> graph * (1 - P.ravel()) <dot> (1 - graph) graph = import_graph(graph) if not is_unweighted(graph): raise ValueError("Model only implemented for unweighted graphs") p_mat = self.p_mat_.copy() if np.shape(p_mat) != np.shape(graph): raise ValueError("Input graph size must be the same size as P matrix") inds = None if not self.directed and self.loops: inds = np.triu_indices_from(graph) # ignore lower half of graph, symmetric elif not self.directed and not self.loops: inds = np.triu_indices_from(graph, k=1) # ignore the diagonal elif self.directed and not self.loops: xu, yu = np.triu_indices_from(graph, k=1) xl, yl = np.tril_indices_from(graph, k=-1) x = np.concatenate((xl, xu)) y = np.concatenate((yl, yu)) inds = (x, y) if inds is not None: p_mat = p_mat[inds] graph = graph[inds] # clip the probabilities that are degenerate if clip is not None: p_mat[p_mat < clip] = clip p_mat[p_mat > 1 - clip] = 1 - clip # TODO: use nonzero inds here will be faster successes = np.multiply(p_mat, graph) failures = np.multiply((1 - p_mat), (1 - graph)) likelihood = successes + failures return np.log(likelihood)
def plot_cor_heatmap(cor, value_range=[-1, 1], title=None, cmap='jet', figsize=None, full=True): """ TODO : This function runs too long for large arrays. Implement with regular matplotlib(??). https://matplotlib.org/gallery/images_contours_and_fields/image_annotated_heatmap.html """ if len(value_range) == 2: vmin, vmax = value_range else: vmin, vmax = cor.min().min(), cor.max().max() fontsize = 8 if figsize is None: sc_x, sc_y = 0.5, 0.5 figsize = sc_x * cor.shape[1], sc_y * cor.shape[0] fig, ax = plt.subplots(figsize=figsize) if full == True: ax = sns.heatmap(cor, vmin=vmin, vmax=vmax, cmap=cmap, annot=True, annot_kws={"size": fontsize}, fmt='.2f', linewidths=0.99, linecolor='white') else: mask = np.zeros_like(cor) # mask[np.triu_indices_from(mask)] = True mask[np.tril_indices_from(mask)] = True ax = sns.heatmap(cor, vmin=vmin, vmax=vmax, cmap=cmap, annot=True, annot_kws={"size": fontsize}, fmt='.2f', linewidths=0.99, linecolor='white', mask=mask) # ax.invert_yaxis() ax.xaxis.tick_top() if isinstance(cor, pd.DataFrame): ax.set_xticklabels(cor.columns, rotation=60) # plt.xticks(range(len(cor.columns)), cor.columns) # plt.yticks(range(len(cor.columns)), cor.columns) if title: plt.title(title) return fig
def test_tril_indices_from_kover(self): a = np.zeros((3, 3)) ref1, ref2 = np.tril_indices_from(a, k=1) tref1, tref2 = tril_indices_from(a, k=1) with self.test_session(): self.assertTrue(np.all(ref1 == tref1.eval())) self.assertTrue(np.all(ref2 == tref2.eval()))
def set_covars(self, raw_covars): raw_covars = raw_covars.reshape([self.num_latent, self.get_covar_size()]) for j in range(self.num_latent): cholesky = np.zeros([self.num_dim, self.num_dim], dtype=util.PRECISION) cholesky[np.tril_indices_from(cholesky)] = raw_covars[j] cholesky[np.diag_indices_from(cholesky)] = np.exp( cholesky[np.diag_indices_from(cholesky)]) self.covars_cholesky[j] = cholesky self.covars[j] = mdot(self.covars_cholesky[j], self.covars_cholesky[j].T)
def createLowerTriangularMatrixOfPairs(self): """ Create triangular matrix indices pairs for the similarity measure """ matrix = np.zeros((self.__num_docs, self.__num_docs)) indices = np.tril_indices_from(matrix) n_rows = indices[0].shape[0] pairs = [(indices[0][i], indices[1][i]) for i in range(n_rows) if not indices[0][i] == indices[1][i]] return pairs
def grad_logprior(self, prior, grad, parameters, **kwargs): scale_Qinv = prior.hyperparams[self._scale_name] df_Qinv = prior.hyperparams[self._df_name] LQinv = getattr(parameters, self._lt_prec_name) grad_LQinv = \ (df_Qinv - LQinv.shape[0] - 1) * np.linalg.inv(LQinv.T) - \ np.linalg.solve(scale_Qinv, LQinv) grad[self._lt_vec_name] = grad_LQinv[np.tril_indices_from(grad_LQinv)] return
def _band_infinite(): '''Suppress the diagonal+- of a distance matrix''' band = np.empty((t, t)) band.fill(np.inf) band[np.triu_indices_from(band, width)] = 0 band[np.tril_indices_from(band, -width)] = 0 return band
def grad_trace_a_inv_dot_covars(self, chol_a, component_index, latent_index): assert component_index == 0 # TODO(karl): There is a bug here related to double counting. tmp = 2.0 * scipy.linalg.cho_solve( (chol_a, True), self.covars_cholesky[latent_index]) tmp[np.diag_indices_from(tmp)] *= ( self.covars_cholesky[latent_index][np.diag_indices_from(tmp)]) return tmp[np.tril_indices_from(self.covars_cholesky[latent_index])]
def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False): """ Bartlett decomposition of the Wishart distribution. As the Wishart distribution requires the matrix to be symmetric positive semi-definite it is impossible for MCMC to ever propose acceptable matrices. Instead, we can use the Barlett decomposition which samples a lower diagonal matrix. Specifically: If L ~ [[sqrt(c_1), 0, ...], [z_21, sqrt(c_1), 0, ...], [z_31, z32, sqrt(c3), ...]] with c_i ~ Chi²(n-i+1) and n_ij ~ N(0, 1), then L * A * A.T * L.T ~ Wishart(L * L.T, nu) See http://en.wikipedia.org/wiki/Wishart_distribution#Bartlett_decomposition for more information. :Parameters: S : ndarray p x p positive definite matrix Or: p x p lower-triangular matrix that is the Cholesky factor of the covariance matrix. nu : int Degrees of freedom, > dim(S). is_cholesky : bool (default=False) Input matrix S is already Cholesky decomposed as S.T * S return_cholesky : bool (default=False) Only return the Cholesky decomposed matrix. :Note: This is not a standard Distribution class but follows a similar interface. Besides the Wishart distribution, it will add RVs c and z to your model which make up the matrix. """ L = S if is_cholesky else scipy.linalg.cholesky(S) diag_idx = np.diag_indices_from(S) tril_idx = np.tril_indices_from(S, k=-1) n_diag = len(diag_idx[0]) n_tril = len(tril_idx[0]) c = tt.sqrt(ChiSquared('c', nu - np.arange(2, 2+n_diag), shape=n_diag)) print('Added new variable c to model diagonal of Wishart.') z = Normal('z', 0, 1, shape=n_tril) print('Added new variable z to model off-diagonals of Wishart.') # Construct A matrix A = tt.zeros(S.shape, dtype=np.float32) A = tt.set_subtensor(A[diag_idx], c) A = tt.set_subtensor(A[tril_idx], z) # L * A * A.T * L.T ~ Wishart(L*L.T, nu) if return_cholesky: return Deterministic(name, tt.dot(L, A)) else: return Deterministic(name, tt.dot(tt.dot(tt.dot(L, A), A.T), L.T))
def heat_map(self, Dplot_specs): data = Dplot_specs["data"] heat = np.array(data) heat[np.tril_indices_from(heat)]= False fig, ax= plt.subplots() fig.set_size_inches(Dplot_specs["figsize"]) sns.set(font_scale=1.0) self.Dheatmap_plot = sns.heatmap(data, mask=heat, vmax=1.0, vmin =0.0, square=True, annot=True, cmap ='Reds') return self.updatePlotCounter("heatMap",Dplot_specs["title"]) # return a cookie reference to heatMapplot
def plot_pairwise_scatter(self, i, threshold=0.95): '''plot pairwise scatter plot of data points, with contours as background Parameters ---------- i : int threshold : float Returns ------- Figure instance The lower triangle background is a binary contour based on the specified threshold. All axis not shown are set to a default value in the middle of their range The upper triangle shows a contour map with the conditional probability, again setting all non shown dimensions to a default value in the middle of their range. ''' model = self.models[i] columns = model.params.index.values.tolist() columns.remove('Intercept') x = self._normalized[columns] data = x.copy() # TODO:: have option to change # diag to CDF, gives you effectively the # regional sensitivity analysis results data['y'] = self.y # for testing grid = sns.PairGrid(data=data, hue='y', vars=columns) grid.map_lower(plt.scatter, s=5) grid.map_diag(sns.kdeplot, shade=True) grid.add_legend() contour_levels = np.arange(0, 1.05, 0.05) for i, j in zip(*np.triu_indices_from(grid.axes, 1)): ax = grid.axes[i, j] ylabel = columns[i] xlabel = columns[j] contours(ax, model, xlabel, ylabel, contour_levels) levels = [0, threshold, 1] for i, j in zip(*np.tril_indices_from(grid.axes, -1)): ax = grid.axes[i, j] ylabel = columns[i] xlabel = columns[j] contours(ax, model, xlabel, ylabel, levels) fig = plt.gcf() return fig
def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False): """ Bartlett decomposition of the Wishart distribution. As the Wishart distribution requires the matrix to be symmetric positive semi-definite it is impossible for MCMC to ever propose acceptable matrices. Instead, we can use the Barlett decomposition which samples a lower diagonal matrix. Specifically: If L ~ [[sqrt(c_1), 0, ...], [z_21, sqrt(c_1), 0, ...], [z_31, z32, sqrt(c3), ...]] with c_i ~ Chi²(n-i+1) and n_ij ~ N(0, 1), then L * A * A.T * L.T ~ Wishart(L * L.T, nu) See http://en.wikipedia.org/wiki/Wishart_distribution#Bartlett_decomposition for more information. :Parameters: S : ndarray p x p positive definite matrix Or: p x p lower-triangular matrix that is the Cholesky factor of the covariance matrix. nu : int Degrees of freedom, > dim(S). is_cholesky : bool (default=False) Input matrix S is already Cholesky decomposed as S.T * S return_cholesky : bool (default=False) Only return the Cholesky decomposed matrix. :Note: This is not a standard Distribution class but follows a similar interface. Besides the Wishart distribution, it will add RVs c and z to your model which make up the matrix. """ L = S if is_cholesky else scipy.linalg.cholesky(S) diag_idx = np.diag_indices_from(S) tril_idx = np.tril_indices_from(S, k=-1) n_diag = len(diag_idx[0]) n_tril = len(tril_idx[0]) c = tt.sqrt(ChiSquared('c', nu - np.arange(2, 2 + n_diag), shape=n_diag)) print('Added new variable c to model diagonal of Wishart.') z = Normal('z', 0, 1, shape=n_tril) print('Added new variable z to model off-diagonals of Wishart.') # Construct A matrix A = tt.zeros(S.shape, dtype=np.float32) A = tt.set_subtensor(A[diag_idx], c) A = tt.set_subtensor(A[tril_idx], z) # L * A * A.T * L.T ~ Wishart(L*L.T, nu) if return_cholesky: return Deterministic(name, tt.dot(L, A)) else: return Deterministic(name, tt.dot(tt.dot(tt.dot(L, A), A.T), L.T))
def _get_raw_covars(self): flattened_covars = np.empty( [self.num_latent, self.get_covar_size()], dtype=np.float32) for i in xrange(self.num_latent): raw_covars = self.covars_cholesky[i].copy() raw_covars[np.diag_indices_from(raw_covars)] = np.log( raw_covars[np.diag_indices_from(raw_covars)]) flattened_covars[i] = raw_covars[np.tril_indices_from(raw_covars)] return flattened_covars.flatten()
def heat_map(): correlation_map = df[df.columns].corr() obj = np.array(correlation_map) obj[np.tril_indices_from(obj)] = False fig, ax = plt.subplots() fig.set_size_inches(15, 10) sns.heatmap(correlation_map, mask=obj, vmax=.7, square=True, annot=True) fig.savefig("1.png") return send_file("1.png")
def setUp(self): # N2O inventory of agricultural source categories for 2012. self.n2o_inv = [ 958.4, 1092.7, 497.8, 42.2, 7.3, 135.9, 1.7, 0.8, 51.1, 9539.9, 4693.5, 472.6, 6171.3, 4750.5, 1315, 2213.5, 11596.4, 162.8 ] # Corresponding list of source category descriptions. self.n2o_index = [ "Manure management, dairy cows", "Manure management, other cattle", "Manure management, pigs", "Manure management, sheep", "Manure management, goats", "Manure management, horses", "Manure management, mules, asses", "Manure management, buffalo", "Manure management, poultry", "Soils, mineral fertilizers", "Soils, application of manure", "Soils, N fixing crops", "Soils, crop residues", "Soils, organic soils", "Soils, grazing", "Soils, indirect emissions (deposition)", "Soils, indirect emissions (leaching, run-off)", "Soils, sewage sludge emissions" ] # Uncertainty of inventory in %, half the 95 % confidence interval. self.n2o_percent = [ 100.1, 100.1, 100.1, 300.2, 300.7, 300.2, 316.2, 100.5, 100.5, 80, 100, 94.3, 94.3, 200, 201, 111.8, 416.3, 82.5 ] # Convert to absolute values in Gg. self.n2o_uncert = [ a * b / 100 for a, b in zip(self.n2o_inv, self.n2o_percent) ] self.n2o_inv_uncert = np.sqrt(np.sum(map(np.square, self.n2o_uncert))) # Hypothetic covariance matrix for N2O emissions. l = len(self.n2o_percent) self.n2o_covmat = np.zeros(shape=(l, l)) np.fill_diagonal(self.n2o_covmat, np.square(self.n2o_uncert)) indu = np.triu_indices_from(self.n2o_covmat, 1) indl = np.tril_indices_from(self.n2o_covmat, -1) # Calculate covariances for an assumed correlation coefficientof 0.5. self.n2o_covmat[indu] = 0.5 * np.sqrt(self.n2o_covmat[ (indu[0], indu[0])] * self.n2o_covmat[(indu[1], indu[1])]) self.n2o_covmat[indl] = 0.5 * np.sqrt(self.n2o_covmat[ (indl[0], indl[0])] * self.n2o_covmat[(indl[1], indl[1])]) self.n2ocovsum = np.sqrt(self.n2o_covmat.sum()) self.n2odiagsum = np.sqrt(np.sum(np.diag(self.n2o_covmat))) # Setup test raster file names and location. self.invin = os.path.join(os.path.dirname(__file__), "data/model_peat_examp_1.tiff") self.uncertin = os.path.join(os.path.dirname(__file__), "data/uncert_peat_examp_1.tiff") # Setup test vector file names and location. self.invvector = os.path.join( os.path.dirname(__file__), "data/n2o_eu_2010_inventory/" "n2o_eu_2010_inventory.shp")
def net_sample_multinomial(A, minEdges, edgesPerSample=1, *args, **kwargs): """ NETWORK SAMPLING ALGORITHM: sample networks ties from multinomial distribution defined as 1/AAT[i,j] normalized by sum(AAT[i>j]) PROBLEM: doesn't sufficiently cluster the resulting network doesn't return exact number of ties, only at least as many as specified minEdges Parameters ---------- A : ndarray matrix of eigenvectors from RESCAL_ALS tensor decomposition with negative values replaced by zeros minEdges : int number of edges (social ties) to be assigned in the network Returns ---------- tuple tie probabilities : ndarray pairwise distances normalized by largest distance sampled network : ndarray binary matrix of assigned ties above cutoff yielding at least minEdges """ draws = int(np.ceil(minEdges * 1.2)) # pairwise distances between observations dist = pdist( A) # what matrix to use: pdist(A) or just tril(AAT) directly? invdist = dist invdist[invdist != 0] = 1 / invdist[invdist != 0] # prevent division by 0 thetavec = invdist / np.sum(invdist) theta = squareform(thetavec) # multinomial sample n = np.shape(theta)[0] Z = np.zeros((n, n)) # samp = sampleLinks(q=thetavec, edgesToDraw=1, draws=draws) y = np.random.multinomial(edgesPerSample, thetavec, draws) samp = np.asarray([ np.mean([y[draw][item] for draw in range(draws)]) for item in range(len(thetavec)) ]) samp = np.ceil(samp) # repeat until reaching enough network ties while np.sum(samp) < minEdges: draws = int(np.ceil(draws * 1.1)) #increase number of draws and try again #samp = sampleLinks(q=thetavec,edgesToDraw=1,draws=draws) y = np.random.multinomial(edgesPerSample, thetavec, draws) samp = np.asarray([ np.mean([y[draw][item] for draw in range(draws)]) for item in range(len(thetavec)) ]) samp = np.ceil(samp) Z[np.tril_indices_from(Z, k=-1)] = samp return (theta, Z)
def gen_k_factor2(nobs=10000, k=2, idiosyncratic_ar1=False, idiosyncratic_var=0.4, k_ar=6): # Simulate bivariate VAR(6) for the factor ix = pd.period_range(start='1950-01', periods=1, freq='M') faux = pd.DataFrame([[0, 0]], index=ix, columns=['f1', 'f2']) mod = varmax.VARMAX(faux, order=(k_ar, 0), trend='n') A = np.zeros((2, 2 * k_ar)) A[:, -2:] = np.array([[0.5, -0.2], [0.1, 0.3]]) Q = np.array([[1.5, 0.2], [0.2, 0.5]]) L = np.linalg.cholesky(Q) params = np.r_[A.ravel(), L[np.tril_indices_from(L)]] # Simulate the factors factors = mod.simulate(params, nobs) # Add in the idiosyncratic part faux = pd.Series([0], index=ix) mod_idio = sarimax.SARIMAX(faux, order=(1, 0, 0)) phi = [0.7, -0.2] if idiosyncratic_ar1 else [0, 0.] tmp = factors.iloc[:, 0] + factors.iloc[:, 1] # Monthly variables endog_M = pd.concat([tmp.copy() for i in range(k)], axis=1) columns = [] for i in range(k): endog_M.iloc[:, i] = ( endog_M.iloc[:, i] + mod_idio.simulate([phi[0], idiosyncratic_var], nobs)) columns += [f'yM{i + 1}_f2'] endog_M.columns = columns # Monthly versions of quarterly variables endog_Q_M = pd.concat([tmp.copy() for i in range(k)], axis=1) columns = [] for i in range(k): endog_Q_M.iloc[:, i] = ( endog_Q_M.iloc[:, i] + mod_idio.simulate([phi[0], idiosyncratic_var], nobs)) columns += [f'yQ{i + 1}_f2'] endog_Q_M.columns = columns # Create quarterly versions of quarterly variables levels_M = 1 + endog_Q_M / 100 levels_M.iloc[0] = 100 levels_M = levels_M.cumprod() # log_levels_M = np.log(levels_M) * 100 log_levels_Q = ( np.log(levels_M).resample('Q', convention='e').sum().iloc[:-1] * 100) # Compute the quarterly growth rate series endog_Q = log_levels_Q.diff() return endog_M, endog_Q, factors
def variance(): data = pd.read_csv("data.csv") df = data.pivot("Ref Tree", "Simulated Tree", "variance") fig, ax = plt.subplots() mask = np.zeros_like(df) mask[np.tril_indices_from(mask)] = True sns.heatmap(df, annot=True, fmt=".3f", cmap="YlGnBu") fig.savefig("variance.png", dpi=300)
def full_corrs(data): """Same- and cross-team correlations. Same-team correlations are above the diagonal; cross-team correlations are on and below the diagonal. """ corr = same_team_corrs(data) tril_ixs = np.tril_indices_from(corr) corr.values[tril_ixs] = cross_team_corrs(data).values[tril_ixs] return corr
def _update(self): self.parameters = self.get_parameters() for k in range(self.num_comp): for j in range(self.num_process): temp = np.zeros((self.num_dim, self.num_dim)) temp[np.tril_indices_from(temp)] = self.L_flatten[k,j,:].copy() temp[np.diag_indices_from(temp)] = np.exp(temp[np.diag_indices_from(temp)]) # temp[np.diag_indices_from(temp)] = temp[np.diag_indices_from(temp)] ** 2 self.L[k,j,:,:] = temp self.s[k,j] = mdot(self.L[k,j,:,:], self.L[k,j,:,:].T)
def get_lower_tri(x, with_diagonal=False): """ Returns the lower triangle of a provided matrix Inputs x (np.ndarray): 2D matrix to get triangle from with_diagonal (bool): if True, keeps the diagonal as part of lower triangle """ k = 0 if with_diagonal else -1 return x[np.tril_indices_from(x, k=k)]
def transform_covars_grad(self, internal_grad): grad = np.empty((self.num_latent, self.get_covar_size()), dtype=np.float32) for j in range(self.num_latent): tmp = self._theano_transform_covars_grad(internal_grad[0, j], self.covars_cholesky[j]) tmp[np.diag_indices_from(tmp)] *= self.covars_cholesky[j][ np.diag_indices_from(tmp)] grad[j] = tmp[np.tril_indices_from(self.covars_cholesky[j])] return grad.flatten()
def flattened_to_symmetric(x): '''Convert a vector containing the elements of a lower triangular matrix into a full symmetric matrix ''' n = triangular_root(len(x)) new = np.zeros((n, n)) inds = np.tril_indices_from(new) new[inds] = x new[(inds[1], inds[0])] = x return new
def predict(mu, sigma): r = batched_kl(mu, sigma, mu, sigma) np.fill_diagonal(r, np.inf) var_norms = np.linalg.norm(sigma, axis=1) sorted_norms = np.argsort(var_norms) rs = r[sorted_norms, :][:, sorted_norms] rs[np.tril_indices_from(rs)] = np.inf p = np.argmin(rs, 1) p[rs[np.arange(p.shape[0]), p] == np.inf] = -1 return p, sorted_norms
def blank_unused_triangle(_map, used_triangle): check_upper_lower(used_triangle) indices = np.tril_indices_from(_map) _out_map = _map.copy() _out_map[indices] = np.nan if used_triangle == "lower": _out_map = _out_map.T return _out_map
def write_ltm(matrix, f): with open(f, "w") as f: x, y = np.tril_indices_from(matrix, -1) a = x[0] for idx in range(len(x)): pair = x[idx], y[idx] if a != pair[0]: f.write("\n") a = pair[0] f.write("%f," % matrix[pair])
def get_initial_w_elements(prior_mean, prior_cov, n_out): # NOTE: This is a numpy function. # Do a cholesky on the prior prior_cov_chol = np.linalg.cholesky(prior_cov) # Extract the elements elts = np.tril_indices_from(prior_cov_chol) return prior_cov_chol[elts]
def watts_and_strogatz(conn, p_conn=[0.1], bin=False): # scale conn data conn_vec = conn[np.tril_indices_from(conn, -1)] data = pd.Series(conn_vec[np.nonzero(conn_vec)]) # generate data given a distribution def get_pdf(data, dist, size): # fit dist to data params = dist.fit(data) # separate parts of parameters arg = params[:-2] loc = params[-2] scale = params[-1] # get same start and end points of distribution start = dist.ppf(0.01, *arg, loc=loc, scale=scale) if arg else dist.ppf( 0.01, loc=loc, scale=scale) end = dist.ppf(0.99, *arg, loc=loc, scale=scale) if arg else dist.ppf( 0.99, loc=loc, scale=scale) # build PDF and turn into pandas Series x = np.linspace(start, end, size) pdf = dist.pdf(x, loc=loc, scale=scale, *arg) return pdf # binarize conn data conn_bin = conn.astype(bool).astype(int) deg = int(np.mean(np.sum(conn_bin, axis=0))) N = len(conn_bin) # create networks networks = [] for p in p_conn: # create watts_strogatz graph G = nx.watts_strogatz_graph(N, deg, p) network = nx.to_numpy_array(G) if not bin: # assign weights to conns mask = np.nonzero(network) actual_conns = conn[mask] new_conns = get_pdf(data, st.powerlognorm, len(mask[0])) network[mask] = new_conns[np.argsort(actual_conns)] # save weighted network networks.append(network) return np.dstack(networks)
def merge_layers(self, dest_layer, src1_layer, src2_layer): w1 = src1_layer.get_weights() w2 = src2_layer.get_weights() res = w1.copy() if type(w1) is list: half = round(len(w1) / 2) res[half:-1] = w2[half:-1] else: l_indices = np.tril_indices_from(w2) res[l_indices] = w2[l_indices] dest_layer.set_weights(res)
def test_multiple_missing(rg): n_sample = 50 n_cov = 2 n_pheno = 31 phenotype_df = pd.DataFrame(random_phenotypes((n_sample, n_pheno), rg)) Y = phenotype_df.to_numpy() Y[np.tril_indices_from(Y, k=-20)] = np.nan assert phenotype_df.isna().sum().sum() > 0 covariate_df = pd.DataFrame(rg.random((n_sample, n_cov))) genotype_df = pd.DataFrame(rg.random((n_sample, 1))) assert_glow_equals_golden(genotype_df, phenotype_df, covariate_df)
def create_connect(xyz, min, max): """Create connectivity dataset.""" # Create a random connection dataset : connect = 100. * np.random.rand(len(xyz), len(xyz)) # Mask the connection aray : connect = np.ma.masked_array(connect, False) # Hide lower triangle : connect.mask[np.tril_indices_from(connect.mask)] = True # Hide connexions that are not between min and max : connect.mask[np.logical_or(connect.data < min, connect.data > max)] = True return connect
def correlationMatrix(self): cor_mat = self.dframe[:].corr() mask = np.array(cor_mat) mask[np.tril_indices_from(mask)] = False fig = plt.gcf() fig.set_size_inches(30, 12) sns.heatmap(data=cor_mat, mask=mask, square=True, annot=True, cbar=True)
def transform_eye_grad(self): """ In the case of posterior distribution with one component, gradients of the entropy term wrt to the posterior covariance is identity. This function returns flatten lower-triangular terms of the identity matrices for all processes. """ grad = np.empty((self.num_comp, self.num_process, self.get_sjk_size())) meye = np.eye((self.num_dim))[np.tril_indices_from(self.L[0,0])] for k in range(self.num_comp): for j in range(self.num_process): grad[k,j] = meye return grad.flatten()
def create_connect(xyz, min, max): """Create connectivity dataset.""" # Create a random connection dataset : connect = np.random.uniform(-100., 100., (len(xyz), len(xyz))) # Mask the connection aray : connect = np.ma.masked_array(connect, False) # Hide lower triangle : connect.mask[np.tril_indices_from(connect.mask)] = True # Hide connexions that are not between min and max : connect.mask[np.logical_or(connect.data < min, connect.data > max)] = True return connect
def update_covariance(self, j, Sj): Sj = Sj.copy() mm = min(Sj[np.diag_indices_from(Sj)]) if mm < 0: Sj[np.diag_indices_from(Sj)] = Sj[np.diag_indices_from(Sj)] - 1.1 * mm for k in range(self.num_comp): self.s[k,j] = Sj.copy() self.L[k,j] = jitchol(Sj,10) tmp = self.L[k,j].copy() tmp[np.diag_indices_from(tmp)] = np.log(tmp[np.diag_indices_from(tmp)]) self.L_flatten[k,j] = tmp[np.tril_indices_from(tmp)] self._update()
def net_sample_deterministic(AATnn, minEdges, *args, **kwargs): """ """ theta = AATnn / AATnn.max() n = np.shape(AATnn)[0] sv = AATnn[np.tril_indices_from(AATnn, k =-1)] #pull singular values from triangle cutOff = ncFunctions.top_n_edges(data = sv, minEdges = minEdges, n = n)['cutOff'] Z = np.zeros((n,n)) Z[np.where(AATnn >= cutOff)] = 1 return (theta, Z)
def to_matrix(self): vector = self.get_parameter_vector(include_frozen=True) if self.metric_type == 0: return np.exp(vector) * np.eye(len(self.axes)) elif self.metric_type == 1: return np.diag(np.exp(vector)) else: n = len(self.axes) L = np.zeros((n, n)) L[np.tril_indices_from(L)] = vector i = np.diag_indices_from(L) L[i] = np.exp(L[i]) return np.dot(L, L.T)
def doubleMutant(data, refVariant, libSeq, startPos=1, refSignal=None, normToRefSignal=True, coop=False, vmin=None, vmax=None, cmap=None, center=0, cbarLabel=None, triangle=None, invertY=True, linewidth=3, **kwargs): """Plot double mutant heatmap given a reference and library sequence""" # Define reference signal as the signal of the reference variant if # refSignal not provided if refSignal is None: refSignal = data[refVariant] # Normalize data to reference signal if normToRefSignal=True if normToRefSignal: data_norm = data / refSignal else: data_norm = data # Generate the double mutant matrix doubleMutantSignals, mutantLabels = doubleMutantMatrix(data_norm, refVariant, libSeq, startPos, coop) # Create mask for triangular matrix if requested mask = np.zeros_like(doubleMutantSignals, dtype=bool) if triangle == 'lower': mask[np.tril_indices_from(mask)] = True mask = np.invert(mask) elif triangle == 'upper': mask[np.triu_indices_from(mask)] = True mask = np.invert(mask) # Plot the double mutant heatmap if cmap is None: cmap = RdYlBu_r2() ax = sns.heatmap(doubleMutantSignals, mask=mask, square=True, robust=True, vmin=vmin, vmax=vmax, center=center, cmap=cmap, xticklabels=mutantLabels, yticklabels=mutantLabels, cbar_kws={'label': cbarLabel}, **kwargs) cax = plt.gcf().axes[-1] if invertY: ax.invert_yaxis() # Draw white lines separating the triplets dim = len(mutantLabels) for x in range(3, dim, 3): ax.plot([x, x], [0, dim], color='white', linewidth=linewidth) for y in range(3, dim, 3): ax.plot([0, dim], [y, y], color='white', linewidth=linewidth) return ax, cax
def JS_dismat(P, fill_tril=True): """ Compute the distance matrix for set of distributions P by computing pairwise Jansen-Shannon divergences. """ # Need to replace it with a faster way dismat = np.zeros((P.shape[0], P.shape[0])) for i,j in zip(*np.triu_indices_from(dismat, k=1)): dismat[i,j] = JS_divergence(P[i,:], P[j,:]) if fill_tril: indices = np.tril_indices_from(dismat, -1) dismat[indices] = dismat.T[indices] return dismat
def correlation(self): """ The correlation between all combinations of trials Returns ------- (r,e) : tuple r is the mean correlation and e is the mean error of the correlation (with df = n_trials - 1) """ c = np.corrcoef(self.input.data) c = c[np.tril_indices_from(c, -1)] return np.mean(c), stats.sem(c)
def tril_indices_from(arr,k=0): """Return the indices for the lower-triangle of an (n,n) array. See tril_indices() for full details. Parameters ---------- n : int Sets the size of the arrays for which the returned indices will be valid. k : int, optional Diagonal offset (see tril() for details). """ return np.tril_indices_from(arr, k)
def slotted_autocorrelation(self, data, time, T, K, second_round=False, K1=100): slots = np.zeros((K, 1)) i = 1 # make time start from 0 time = time - np.min(time) # subtract mean from mag values m = np.mean(data) data = data - m prod = np.zeros((K, 1)) pairs = np.subtract.outer(time, time) pairs[np.tril_indices_from(pairs)] = 10000000 ks = np.int64(np.floor(np.abs(pairs) / T + 0.5)) # We calculate the slotted autocorrelation for k=0 separately idx = np.where(ks == 0) prod[0] = ((sum(data ** 2) + sum(data[idx[0]] * data[idx[1]])) / (len(idx[0]) + len(data))) slots[0] = 0 # We calculate it for the rest of the ks if second_round is False: for k in np.arange(1, K): idx = np.where(ks == k) if len(idx[0]) != 0: prod[k] = sum(data[idx[0]] * data[idx[1]]) / (len(idx[0])) slots[i] = k i = i + 1 else: prod[k] = np.infty else: for k in np.arange(K1, K): idx = np.where(ks == k) if len(idx[0]) != 0: prod[k] = sum(data[idx[0]] * data[idx[1]]) / (len(idx[0])) slots[i - 1] = k i = i + 1 else: prod[k] = np.infty np.trim_zeros(prod, trim='b') slots = np.trim_zeros(slots, trim='b') return prod / prod[0], np.int64(slots).flatten()
def get_matrix(self): """Return the current internal matrix. Returns ------- M : ndarray, shape (n, n) Dense matrix containing either the Hessian or its inverse (depending on how `approx_type` was defined). """ if self.approx_type == 'hess': M = np.copy(self.B) else: M = np.copy(self.H) li = np.tril_indices_from(M, k=-1) M[li] = M.T[li] return M