def Hartigans(k, data): start = tm.clock() Ct = np.hstack( (data, np.reshape(np.random.choice(range(0, k), data.shape[0], replace=True), (data.shape[0], 1)))) meu = (npi.group_by(Ct[:, 2]).mean(Ct))[1][:, 0:2] Converged = False while Converged is False: Converged = True for j in range(0, Ct.shape[0]): Cj = Ct[j, 2] dmin = [] for i in range(0, k): Ct[j, 2] = i G = (npi.group_by(Ct[:, 2])).split(Ct) dist = 0 #print(G) for p in range(0, k): t = (G[p][:, 0:2]) mi = np.reshape(np.mean(t, axis=0), (1, 2)) t = np.sum((t - mi)**2, axis=1) dist = dist + np.sum(t, axis=0) dmin.append(dist) Cw = np.argmin(dmin) if Cw != Cj: Converged = False Ct[j, 2] = Cw meu = (npi.group_by(Ct[:, 2]).mean(Ct))[1][:, 0:2] else: Ct[j, 2] = Cj end = tm.clock() return Ct, np.hstack((meu, np.reshape(np.array(list(range(k))), (k, 1)))), end - start
def cascade_predict(X_out, detect, role, detect_label): # # refit both detect and role # detectparams = detect.best_estimator_.get_params() # roleparams= role.best_estimator_.get_params() # re_detect = detect.best_estimator_.set_params(**detectparams) # re_role = role.best_estimator_.set_params(**roleparams) # # detect = re_detect.fit(X_in, y_in) # role = re_role.fit(X_in, y_in) # predict holdout for detection y_pred_detect y_pred_dtct = detect.predict(X_out).astype(int) # split off positive and negs from y and X in y_pred_detect_minus1, y_pred_detect_1 unique_dtct, idx_groups_dtct = npi.group_by(y_pred_dtct, np.arange(len(y_pred_dtct))) pred_dtct_idc = dict(zip(unique_dtct, idx_groups_dtct)) dtct_idc = pred_dtct_idc[detect_label] X_dtct = X_out[dtct_idc] # predict X_1: return y_pred_1_pred y_pred_role = role.predict(X_dtct).astype(int) unique_role, idx_groups_role = npi.group_by(y_pred_role, np.arange(len(y_pred_role))) pred_role_idc = dict(zip(unique_role, idx_groups_role)) # reconstruct full y_pred y_pred = reconstruct_list([pred_dtct_idc, pred_role_idc], filter_label=[detect_label]) return y_pred
def grouper(x, y): ''' Assumed sorted by speed ''' result = npi.group_by(x).mean(y) sdev = npi.group_by(x).std(y) return result, sdev
def _group_sample_statistics(self): r""" Computes group summary statistics (mean, number of observations, and variance), for use when performing analysis of variance. Returns ------- group_stats : dict Dictionary containing each group's mean, number of observations and variance. """ group_means = npi.group_by(self.design_matrix[:, 0], self.design_matrix[:, 1], np.mean) group_obs = npi.group_by(self.design_matrix[:, 0], self.design_matrix[:, 1], len) group_variance = npi.group_by(self.design_matrix[:, 0], self.design_matrix[:, 1], var) groups = len(np.unique(self.design_matrix[:, 0])) group_stats = { 'Group Means': group_means, 'Group Observations': group_obs, 'Group Variance': group_variance, 'Number of Groups': groups } return group_stats
def _mse(self): r""" Calculates the Mean Square Error for use in computing Tukey's HSD. Returns ------- mse : float The Mean Square Error of the design. Notes ----- The MSE is found by diving the standard error of the design by the total number of observations minus the number of groups. .. math:: MSE = \frac{SE}{(N - k)} """ group_variance = npi.group_by(self.design_matrix[:, 0], self.design_matrix[:, 1], var) group_n = npi.group_by(self.design_matrix[:, 0], self.design_matrix[:, 1], len) sse = 0 for i, j in zip(group_n, group_variance): sse += (i[1] - 1) * j[1] mse = sse / (self.n - self.k) return mse
def double_crossover(parent_1, parent_2): """This funcvtion create 2 childs by same sizes but reverses (len(p1) = len(ch2) and ...) """ row1, col1 = parent_1.shape row2, col2 = parent_2.shape row = np.min([row1,row2]) rowt1 = (random.randrange(1, row - 1 if row>2 else row)) rowt2 = (random.randrange(1, row - 1 if row>2 else row)) #print(rowt1,rowt2) child_1 = np.concatenate((parent_1[:rowt1, :], parent_2[rowt1:rowt2, :], parent_1[rowt2:, :] ), axis = 0) child_2 = np.concatenate((parent_2[:rowt1, :], parent_1[rowt1:rowt2, :], parent_2[rowt2:, :] ), axis = 0) """after create childs by composit of parents probably create duplicated columns then shoud be remove by "group by" of "numpy_indexed" """ _, child_1 = npi.group_by(child_1[:,0]).min(child_1) _, child_2 = npi.group_by(child_2[:,0]).max(child_2) return child_1, child_2
def make_Xy_cat(X, y, task): X_cat, y_cat = npi.group_by(X).mean(y) if task == 'quantile': y_cat = npi.group_by(X).split(y) y_cat = np.array([np.quantile(yc, q=0.75) for yc in y_cat]) return X_cat, y_cat
def mode(self, values, weights=None): """compute the mode within each group. Parameters ---------- values : array_like, [keys, ...] values to compute the mode of per group weights : array_like, [keys], float, optional optional weight associated with each entry in values Returns ------- unique: ndarray, [groups] unique keys reduced : ndarray, [groups, ...] value array, reduced over groups """ if weights is None: unique, weights = npi.count( (self.index.sorted_group_rank_per_key, values)) else: unique, weights = npi.group_by( (self.index.sorted_group_rank_per_key, values)).sum(weights) x, bin = npi.group_by(unique[0]).argmax(weights) return x, unique[1][bin]
def predict(df): for col in df: df[col] = norm(df[col]) y = df['time'] x = df.drop('time', 1) config_dictionary = json.load( open('/home/thanasis/PycharmProjects/dionePredict/analysis.json')) plotter = Plotter() test_sizes = config_dictionary['train_size'] plotter.setup_plot(**config_dictionary) scores_per_technique = {} for model in models: if model not in scores_per_technique: scores_per_technique[model] = list() for size in range(len(test_sizes)): x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=test_sizes[size]) regressor = models[model]() regressor.fit(x_train, y_train) y_pred = regressor.predict(x_test) score = regressor.score(x_test, y_test) print(models[model]) print('MSE: ' + str(mean_squared_error(y_test, y_pred))) print('R2_Score: ' + str(score)) print('-----------------------------------------------------') scores_per_technique[model].append([test_sizes[size] * 100, score]) bar_pos = -1.5 * config_dictionary['width'] pos = 0 for technique in scores_per_technique: print('Plotting results for ' + technique) results = scores_per_technique[technique] print(results) results_numpy = np.array(results) config_dictionary['label'] = technique config_dictionary['bar_position'] = bar_pos config_dictionary['color'] = config_dictionary['colors'][pos] x_unique, y_unique = npi.group_by(results_numpy[..., 0]).mean( results_numpy[..., 1]) x_unique, y_std = npi.group_by(results_numpy[..., 0]).std(results_numpy[..., 1]) plotter.plot_data_using_error_bars(x_unique, y_unique, y_std, config_dictionary) # plotting_tool.plot_data_using_bars(x_unique, y_unique, config_dictionary) bar_pos += config_dictionary['width'] pos += 1 plotter.store_and_show(**config_dictionary)
def _mse(self): group_variance = npi.group_by(self.ranked_matrix[:, 0], self.ranked_matrix[:, 2], var) group_n = npi.group_by(self.ranked_matrix[:, 0], self.ranked_matrix[:, 2], len) sse = 0 for i, j in zip(group_n, group_variance): sse += (i[1] - 1) * j[1] return sse / (self.n - self.k)
def _group_comparison(self): r""" Constructs a pandas DataFrame containing the test results and group comparisons as found by Tukey's HSD test. Returns ------- groups : array-like pandas DataFrame of group comparison results. """ group_means = npi.group_by(self.design_matrix[:, 0], self.design_matrix[:, 1], np.mean) group_means = [i for _, i in group_means] group_mean_differences = np.array(list(combinations(group_means, 2)))[:, 0] - \ np.array(list(combinations(group_means, 2)))[:, 1] group_sd = npi.group_by(self.design_matrix[:, 0], self.design_matrix[:, 1], std_dev) group_sd = [i for _, i in group_sd] group_names = np.unique(self.design_matrix[:, 0]) groups = pd.DataFrame(np.array(list(combinations(group_names, 2)))) groups['groups'] = groups[0] + ' - ' + groups[1] groups['group means'] = group_means groups['mean difference'] = group_mean_differences groups['std_dev'] = group_sd groups['significant difference'] = np.where( np.abs(groups['mean difference']) >= self.hsd, True, False) groups['upper interval'] = groups[ 'mean difference'] + self.tukey_q_value * np.sqrt( self.mse / 2. * (2. / (self.n / self.k))) groups['lower interval'] = groups[ 'mean difference'] - self.tukey_q_value * np.sqrt( self.mse / 2. * (2. / (self.n / self.k))) q_values = groups['mean difference'] / group_sd groups['p_adjusted'] = psturng(np.absolute(q_values), self.n / self.k, self.dof) del groups[0] del groups[1] return groups
def _getPitParams(self, hl, nbpits): """ Define depression global parameters: - volume of each depression - maximum filled depth :arg hl: numpy array of unfilled surface elevation :arg nbpits: number of depression in the global mesh """ # Get pit parameters (volume and maximum filled elevation) ids = self.inIDs == 1 grp = npi.group_by(self.pitIDs[ids]) uids = grp.unique _, vol = grp.sum((self.lFill[ids] - hl[ids]) * self.larea[ids]) _, hh = grp.max(self.lFill[ids]) _, dh = grp.max(self.lFill[ids] - hl[ids]) totv = np.zeros(nbpits, dtype=np.float64) hmax = -np.ones(nbpits, dtype=np.float64) * 1.0e8 diffh = np.zeros(nbpits, dtype=np.float64) ids = uids > -1 totv[uids[ids]] = vol[ids] hmax[uids[ids]] = hh[ids] diffh[uids[ids]] = dh[ids] MPI.COMM_WORLD.Allreduce(MPI.IN_PLACE, totv, op=MPI.SUM) MPI.COMM_WORLD.Allreduce(MPI.IN_PLACE, hmax, op=MPI.MAX) MPI.COMM_WORLD.Allreduce(MPI.IN_PLACE, diffh, op=MPI.MAX) self.pitParams = np.empty((nbpits, 3), dtype=np.float64) self.pitParams[:, 0] = totv self.pitParams[:, 1] = hmax self.pitParams[:, 2] = diffh return
def ATE_emp_IPW(Xg, yg, ys, dismiss): idx_cat = npi.group_by(Xg).split(np.arange(len(yg))) ATE = 0 n_emp = 0 cpt, cpt2 = 0, 0 for ic in idx_cat: m1 = yg[ic] == 1 m0 = yg[ic] == 0 if dismiss: # Dismiss categories with only men/women if m1.any() and m0.any(): ate_cat = (ys[ic][m1].mean() - ys[ic][m0].mean()) * len(ic) ATE += ate_cat n_emp += len(ic) cpt += 1 else: cpt2 += len(ic) else: # Keep categories with only men/women if not m1.any(): y1 = 0 else: y1 = ys[ic][m1].mean() if not m0.any(): y0 = 0 else: y0 = ys[ic][m0].mean() ate_cat = (y1 - y0) * len(ic) ATE += ate_cat n_emp += len(ic) ATE = ATE / n_emp print(cpt2) return ATE
def join_windows(list_grads): """Join windows by averaging overlapping regions of .czar.grad files. https://stackoverflow.com/questions/41821539/calculate-average-of-y-values-with-different-x-values Parameters ---------- list_grads : list list of Grad objects to be combined Returns ------- new_grad : Grad new Grad object with xdata and ydata of combined grads """ # combine all xdata and all ydata x, grad, allfiles = Profile._decompose_list(list_grads) # average the values having same x gridpoint x_unique, grad_mean = npi.group_by(x).mean(grad) # create new grad instance for joined data new_grad = Grad(allfiles, x_unique.flatten(), grad_mean.flatten()) # reorder data for ascending x, then return object new_grad._sort_by_x() return new_grad
def symmetrize(self): # average the values having same abs(x) gridpoint rhs_x, rhs_y = npi.group_by(np.abs(self.xdata)).mean(self.ydata) # regenerate -x data from averaged values (stored in +x side) full_x = np.concatenate((np.flip(-rhs_x), rhs_x)) full_y = np.concatenate((np.flip( rhs_y), rhs_y)) # remove the -0.0 entry if it exists first_neg_idx = len(rhs_x)-1 if (rhs_x[0] == 0.0) and (full_y[first_neg_idx] == full_y[len(rhs_x)]): full_x = np.delete(full_x, first_neg_idx) full_y = np.delete(full_y, first_neg_idx) # compute difference before and after symmetrization if not np.array_equal(self.xdata, full_x): print(" error in subtracting pmfs before/after symmetrization" + "\n the x-range differs here:\n " + np.setdiff1d(self.xdata, full_x)) else: subtracted = np.abs(self.ydata - full_y) self.errbar = subtracted # set data in object self.xdata = full_x self.ydata = full_y
def __init__(self, y1, y2=None, group=None, continuity=True): if group is None: self.y1 = y1 self.y2 = y2 else: if len(np.unique(group)) > 2: raise ValueError('there cannot be more than two groups') obs_matrix = npi.group_by(group, y1) self.y1 = obs_matrix[1][0] self.y2 = obs_matrix[1][1] self.n1 = len(self.y1) self.n2 = len(self.y2) self.n = self.n1 + self.n2 self.continuity = continuity self.ranks = self._rank() self.u_statistic = self._u() self.meanrank = self._mu() self.sigma = self._sigma_val() self.z_value = self._z() self.p_value = self._p_val() self.effect_size = self._eff_size() self.test_summary = { 'continuity': self.continuity, 'U': self.u_statistic, 'mu meanrank': self.meanrank, 'sigma': self.sigma, 'z-value': self.z_value, 'effect size': self.effect_size, 'p-value': self.p_value, 'test description': 'Mann-Whitney U test' }
def remap_edges(self, field): """given a quantity computed on each triangle-edge, sum the contributions from each adjecent triangle""" edges = self.edges().reshape(-1, 3, 2) sorted_edges = np.sort(edges, axis=-1) _, field = npi.group_by(sorted_edges.reshape(-1, 2)).sum(field.flatten()) return field
def update_intermediaries(self): # Count types self.n_i = np.zeros(self.n_cell_types) cell_types, counts = np.unique(self.Z, return_counts=True) self.n_i[cell_types] = counts # Reverse map each type l: list of cells having type l values, indices = npi.group_by(self.Z, np.arange(self.n_cells)) self.Vi = [np.array([]) for _ in range(self.n_cell_types)] for i, v in enumerate(values): self.Vi[v] = indices[i] # For each cell v, number of neighbors of type k self.mvk = np.zeros((self.n_cells, self.n_cell_types)) for i in range(self.n_cells): cell_types, counts = np.unique(self.Z[self.graph[i]], return_counts=True) self.mvk[i][cell_types] = counts # Count number of edge k->l (in l,k) self.ctype_ctype_counts = np.zeros( (self.n_cell_types, self.n_cell_types)) for l in range(self.n_cell_types): if len(self.Vi[l]): self.ctype_ctype_counts[l] = self.mvk[self.Vi[l]].sum(axis=0)
def _get_bandgroups_with_unique_rpc_coeffs(self) -> List[List]: # combine RPC coefficients of all bands in a single numpy array band_inds = list(range(len(self.rpc_coeffs_per_band))) coeffs_first_band = list(self.rpc_coeffs_per_band.values())[0] keys_float = [ k for k in coeffs_first_band if isinstance(coeffs_first_band[k], float) ] keys_npa = [ k for k in coeffs_first_band if isinstance(coeffs_first_band[k], np.ndarray) ] coeffs_allbands = None for i, coeffdict in enumerate(self.rpc_coeffs_per_band.values()): coeffs_curband = np.hstack([[coeffdict[k] for k in keys_float], *(coeffdict[k] for k in keys_npa)]) if coeffs_allbands is None: coeffs_allbands = np.zeros( (len(band_inds), 1 + len(coeffs_curband))) coeffs_allbands[:, 0] = band_inds coeffs_allbands[i, 1:] = coeffs_curband # get groups of band indices where bands have the same RPC coefficients groups = npi.group_by(coeffs_allbands[:, 1:]).split(coeffs_allbands[:, 0]) groups_bandinds = [group.astype(int).tolist() for group in groups] return groups_bandinds
def recalculate_data(data, clusters): calc_average_change = partial(np.apply_along_axis, func1d=gmean, axis=0) clustered = np.empty((clusters.max(), data.shape[1])) groups = npi.group_by(clusters).split(data) for i, clust in enumerate(groups): clustered[i] = calc_average_change(arr=clust) return clustered
def main(infile, outfile, column): # load data xdata, ydata = np.loadtxt(args.infile, usecols=(0, column), unpack=True) # round the xdata to four decimal places xdata = np.round_(xdata, 4) # average the values having same abs(x) gridpoint rhs_x, rhs_y = npi.group_by(np.abs(xdata)).mean(ydata) # regenerate -x data from averaged values (stored in +x side) full_x = np.concatenate((np.flip(-rhs_x), rhs_x)) full_y = np.concatenate((np.flip(rhs_y), rhs_y)) # remove the -0.0 entry if it exists first_neg_idx = len(rhs_x) - 1 if (rhs_x[0] == 0.0) and (full_y[first_neg_idx] == full_y[len(rhs_x)]): full_x = np.delete(full_x, first_neg_idx) full_y = np.delete(full_y, first_neg_idx) # save to file np.savetxt(outfile, np.column_stack((full_x, full_y)), delimiter='\t', fmt='%10.4f %10.10f')
def _pitInformation(self, gZ, hFill): """ Function to extract the volume of all depressions based on current elevation, depressionless one and voronoi cell areas. It also stores the spillover vertices indices for each of the depression. It is ran over the global mesh. .. note:: This function uses the **numpy-indexed** library which contains functionality for indexed operations on numpy ndarrays and provides efficient vectorized functionality such as grouping and set operations. :arg gZ: global elevation numpy array :arg hFill: global depressionless elevation numpy array """ # Compute pit volumes groupPits = npi.group_by(self.pits[:, 0]) pitNb, self.pitVol = groupPits.sum((hFill - gZ) * self.garea) _, outids, _ = np.intersect1d(self.pits[:, 0], pitNb, return_indices=True) self.outFlows = self.pits[outids, 1] del groupPits, pitNb, outids gc.collect() return
def best_individual(self): """Return the individual with the best fitness in the current generation. """ best = self.current_generation[0] _, genes = npi.group_by(best.genes[:,0]).max(best.genes) return (best.fitness, genes)
def add_swap(parent, meta_date): """This function vreate new child with adding rows and then swaping last and random row """ child = parent points = meta_data[0] rq_time = meta_data[1] msk = np.isin(points, child[:,0]) points_accpt = points[~msk] p = 1/len(points_accpt) if len(points_accpt)>0 else 1 #print(points_accpt) while p < 1: #print(p) new_row = np.array([[np.random.choice(points_accpt, 1, p)[0],60]]) child = np.append(child, new_row, axis=0) msk = np.isin(points, child[:,0]) points_accpt = points[~msk] p = 1/len(points_accpt) if len(points_accpt)>0 else 1 row , col = child.shape rowt = random.randrange(1, row - 1 if row>2 else row) #print(rowt) child[rowt, 0] ,child[row-1, 0] = child[row-1, 0], child[rowt, 0] _, child = npi.group_by(child[:,0]).min(child) return child
def add_swap(parent, meta_data): """This function vreate new child with adding rows and then swaping last and random row """ child = parent points = meta_data[:,0] rq_time = meta_data[:,1] msk = np.isin(meta_data[:,0], child[:,0]) points_accpt = meta_data[~msk] row = len(points_accpt) p = 1/row if row>0 else 1 #print(points_accpt) while p < 1: #print(p) rowAcpt = random.randrange(1, row - 1 if row>2 else row) new_row = points_accpt[rowAcpt] try: child = np.vstack((child, new_row)) except: print('------------------') msk = np.isin(meta_data[:,0], child[:,0]) points_accpt = meta_data[~msk] row = len(points_accpt) p = 1/row if row>0 else 1 row , col = child.shape rowt = random.randrange(1, row - 1 if row>2 else row) #print(rowt) child[rowt] ,child[row-1] = child[row-1], child[rowt] _, child = npi.group_by(child[:,0]).min(child) return child
def fitness(individual, meta_data): _, individual = npi.group_by(individual[:,0]).max(individual) # individual = set_const(individual, const) calc_starttime(individual) individual = set_const(individual, const) len_pln = len(individual) edge = len_pln - 1 pln_pnt = individual[:,0] len_points = len(points) all_duration = np.sum(individual[:,1]) end_plan = individual[edge,3]+individual[edge,1] all_dist = end_plan - all_duration cost_fultm = cost_fulltime(individual, end_plan) cost_lntm = cost_lentime(individual, all_dist, all_duration) cost_cnt = cost_count(individual, meta_data) cost_vis_time, cost_rq_time = cost_diffTime(individual) # print('cost_fultm: '+str(cost_fultm)) # print('cost_lntm: '+str(cost_lntm)) # print('cost_cnt: '+str(cost_cnt)) # print('cost_diff_rqTime: '+str(cost_diff_rqTime)) cost =((coh_fultm*cost_fultm) + (coh_lntm*cost_lntm) + (coh_cnt*cost_cnt) + (coh_dffRqTime*cost_rq_time)+ (coh_dffVisTime*cost_vis_time) ) # print(cost) # msk = np.isin(const[:,0], individual[:,0]) # notUsed_const = const[~msk] # penalty = np.sum(notUsed_const[:,1]) / tot_lenTimeConst return cost #*(1 + (coh_pnlty*penalty))
def _update_beta(self, X): #print "b_gamma 0", self.gamma_b.shape #print "b_rho 0", self.rho_b.shape self.gamma_b = self.b + npi.group_by(self.row_index).sum(self.phi_var)[1] self.rho_b = self.b + np.sum(self.Et, axis=0, keepdims=True) #print "b_gamma 1", self.gamma_b.shape #print "b_rho 1", self.rho_b.shape self.Eb, self.Elogb = _compute_expectations(self.gamma_b, self.rho_b)
def visualize_gridsearch(results, pipe_step, parameter): plt.figure(figsize=(13, 13)) plt.title("Evaluation of the Parameter %s" % (parameter), fontsize=16) plt.xlabel(parameter) plt.ylabel("Average Score") plt.grid() ax = plt.axes() # Get the regular numpy array from the MaskedArray X_axis = np.array(results['param_%s' % (pipe_step)].data, dtype=float) for sample, style in (('train', '--'), ('test', '-')): x_unique, sample_score_mean = npi.group_by(X_axis).mean( results['mean_%s_score' % (sample)]) x_unique, sample_score_std = npi.group_by(X_axis).mean( results['std_%s_score' % (sample)]) ax.fill_between(x_unique, sample_score_mean - sample_score_std, sample_score_mean + sample_score_std, alpha=0.1 if sample == 'test' else 0) ax.plot(x_unique, sample_score_mean, style, alpha=1 if sample == 'test' else 0.7, label=sample) best_index = np.nonzero(results['rank_test_score'] == 1)[0][0] best_score = results['mean_test_score'][best_index] # Plot a dotted vertical line at the best score for that scorer marked by x ax.plot([ X_axis[best_index], ] * 2, [0, best_score], linestyle='-.', marker='x', markeredgewidth=3, ms=8) # Annotate the best score for that scorer ax.annotate("%0.2f" % best_score, (X_axis[best_index], best_score + 0.005)) plt.legend(loc="best") plt.show()
def _update_theta(self, X): #print "t_gamma 0", self.gamma_t.shape #print "t_rho 0", self.rho_t.shape self.gamma_t = self.a + npi.group_by(self.cols_index).sum(self.phi_var)[1] self.rho_t = self.a + np.sum(self.Eb, axis=0, keepdims=True) #print "t_gamma 1", self.gamma_t.shape #print "t_rho 1", self.rho_t.shape self.Et, self.Elogt = _compute_expectations(self.gamma_t, self.rho_t)
def matheron(self, bin_type="auto", bins=10, var=False): #should add binning with constant number of values """ *Calculate Matheron variogram for points and field values previously fed into variogram. A few options for specifying binning exist. Parameters ---------- bin_type : str Descriptor of the format of data passed into the bin parameter. Can be one of: * "auto" : select bounds to be (0, self.range[1]/2), bin centers will be calculated accordingly based on user given number of bins. * "lin" : bin boundaries will be linearly spaced based on a user given minima, maxima and number of bins. Bin centers will not fall on given maxima and minima. * "bound" : bounds will be completely given by the user as a numpy array bins : int, list, array-like Description of how binning will be performed in Matheron variogram, specific formats given below for each bin type. * "auto" : int giving number of bins to use * "lin" : list containing three entries. First is minima of bin boundaries, second is maxima of bin bounds and third is the number of bins to use as int * "bound" : array-like object specifying boundaries of bins. Number of bins is length of this array - 1. var : bool Set True for bin-wise variance to be calculated and returned Returns ------- centers : numpy.ndarray Bin centers used for variogram n_bins : numpy.ndarray Number of point relations used to calculate each semivariance v : numpy.ndarray Estimated semivariance values at lags corresponding to bin centers v_var (optional) : numpy.ndarray Variance associated with squared difference values within a bin """ bins = self.set_bins(bin_type, bins) centers = bins[:-1] + np.diff(bins, 1) / 2 b_ind = np.digitize(self.lags, bins) n_bins = np.bincount(b_ind - 1)[:-1] gp = group_by(b_ind[np.where(b_ind != bins.size)]) _, v = gp.mean(self.diffs[np.where( b_ind != bins.size)]) #account for lags bigger than bins if var: _, v_var = gp.var(self.diffs[np.where( b_ind != bins.size)]) #account for lags bigger than bins return centers, n_bins, v, v_var else: return centers, n_bins, v
def boundary_edges(self): """ Returns ------- ndarray, [3, 2**level], int """ r, c = np.nonzero(self.boundary_edges_chain) return npi.group_by(c).split_array_as_array(r)
def boundary_vertices(self): """ Returns ------- ndarray : [3, 1], int """ r, c = np.nonzero(self.boundary_vertices_chain) return npi.group_by(c).split_array_as_array(r)
def get_coset(self, orbit): """Compute the cosets, given a labeling of all elements describing the orbits Returns ------- array_like, [n-index], of ndarray, [coset_size], int each array represents a set of elements that form a coset """ n_tiles, labels = orbit cosets = npi.group_by(labels).split(np.arange(len(labels))) return cosets
def boundary_info(self): """Return terms describing how the triangle boundary stitches together Returns ------- vertices : ndarray, [n_terms], int the vertex index this boundary term applies to single number for edge vertices; multiple entries for corner vertices quotient : ndarray, [n_terms], int relative element in quotient group to reach opposing element how current index relates to other side of the term neighbor : ndarray, [n_terms], int relative element in quotient group to reach opposing element how current index relates to other side of the term sub : ndarray, [n_terms], int relative subgroup transform. only needed by normal transformation so far to get transformations """ # vi = self.group.vertex_incidence # [n_vertex_entries, 4] ei = self.group.edge_incidence # [n_edge_entries, 4] # these are the vertex indices for all edges and corners of the triangle bv = self.triangle.boundary_vertices # [3, 1] be = self.triangle.boundary_edge_vertices # [3, n_boundary_edges] def broadcast(a, b): shape = len(b), len(a), 3 a = np.broadcast_to(a[None], shape) b = np.broadcast_to(b[:, None], shape[:-1]) return np.concatenate([b.reshape(-1, 1), a.reshape(-1, 3)], axis=1) v = [broadcast(a, b) for a, b in zip(npi.group_by(vi[:, 0]).split(vi[:, 1:]), bv)] e = [broadcast(a, b) for a, b in zip(npi.group_by(ei[:, 0]).split(ei[:, 1:]), be)] return np.concatenate(v + e, axis=0)
def self_intersect(self): """ test curve of arc-segments for intersection raises exception in case of intersection alternatively, we might resolve intersections by point insertion but this is unlikely to have any practical utility, and more likely to be annoying """ vertices = self.vertices faces = self.faces tree = KDTree(vertices) # curve points per edge, [n, 2, 3] cp = util.gather(faces, vertices) # normal rotating end unto start normal = util.normalize(np.cross(cp[:,0], cp[:,1])) # midpoints of edges; [n, 3] mid = util.normalize(cp.sum(axis=1)) # vector from end to start, [n, 3] diff = np.diff(cp, axis=1)[:,0,:] # radius of sphere needed to contain edge, [n] radius = np.linalg.norm(diff, axis=1) / 2 * 1.01 # FIXME: this can be vectorized by adapting pinv projector = [np.linalg.pinv(q) for q in np.swapaxes(cp, 1, 2)] # incident[vertex_index] gives a list of all indicent edge indices incident = npi.group_by(faces.flatten(), np.arange(faces.size)) def intersect(i,j): """test if spherical line segments intersect. bretty elegant""" intersection = np.cross(normal[i], normal[j]) #intersection direction of two great circles; sign may go either way though! return all(np.prod(np.dot(projector[e], intersection)) > 0 for e in (i,j)) #this direction must lie within the cone spanned by both sets of endpoints for ei,(p,r,cidx) in enumerate(izip(mid, radius, faces)): V = [v for v in tree.query_ball_point(p, r) if v not in cidx] edges = np.unique([ej for v in V for ej in incident[v]]) for ej in edges: if len(np.intersect1d(faces[ei], faces[ej])) == 0: #does not count if edges touch if intersect(ei, ej): raise Exception('The boundary curves intersect. Check your geometry and try again')
def stitch_groups(self): info = self.boundary_info groups = npi.group_by(info[:, :2]) return groups