def calc_dist_matrix(self, approach=2, verbose=False): """ Method to calculate euclidean distance from each sample and make a matrix :return: 2d matrix """ if verbose: Opt.cprint('Building distance matrix... ', newline='') if approach == 1: self.distance_matrix = np.apply_along_axis( lambda x: Euclidean.mat_dist(x, self.matrix), 1, self.matrix) elif approach == 2: ndims = self.matrix.shape[1] temp_mat = np.zeros([self.matrix.shape[0], self.matrix.shape[0]], np.float32) for dim in range(ndims): arr = np.repeat(self.matrix[:, dim][:, np.newaxis], self.nsamp, 1) arr_ = arr.T temp_mat += (arr - arr_)**2 self.distance_matrix = np.sqrt(temp_mat) else: raise ValueError('Unrecognized approach') if verbose: Opt.cprint('Done!')
def proximity_filter(self, thresh=None, verbose=False): """ method to remove points based on proximity threshold :param thresh: proximity threshold (default: 90th percentile) valid values: 1-99 :param verbose: If steps should be displayed :return: None """ if verbose: Opt.cprint('Applying proximity filter...') if thresh is None: thresh = self.cluster_center('percentile_90') elif 'percentile_' in thresh: thresh = self.cluster_center(thresh) elif thresh in (int, float): thresh = self.cluster_center('percentile_{}'.format( str(int(thresh)))) else: if verbose: warnings.warn( 'Invalid thresh value.\n Using default: 90th percentile centroid vector.' ) thresh = self.cluster_center('percentile_90') # number of close proximities associated with each element n_proxim = np.apply_along_axis( lambda x: np.count_nonzero((x > 0.0) & (x < thresh)), 0, self.distance_matrix) if verbose: Opt.cprint('Max group size : {} '.format(str(n_proxim.max())), newline='') Opt.cprint('Min group size : {} '.format(str(n_proxim.min()))) # sort the indices in increasing order of n_proxim idx = [] idx += np.argsort(n_proxim).tolist() idx_out = list() # find indices of elements that should be removed for ii in idx: if ii not in idx_out: arr = self.distance_matrix[ii, 0:(ii + 1)] temp_list = (np.where((arr < thresh) & (arr > 0.0))[0]).tolist() idx_out += temp_list idx_out = list(set(idx_out)) # sort the indices in decreasing order for pop() pop_idx = sorted(list(set(idx_out)), reverse=True) if verbose: Opt.cprint('Removing {} elements...'.format(str(len(pop_idx)))) for pop_id in pop_idx: self.samples.pop(pop_id) self.nsamp = len(self.samples) self.index = list(range(self.nsamp))
def selection(self, index_list): """ Method to select samples based on an index list :param index_list: :return: Samples object """ if type(index_list).__name__ in ('list', 'tuple', 'NoneType'): index_list = np.array(Opt.__copy__(index_list)) samp = Samples() samp.x_name = self.x_name samp.y_name = self.y_name samp.x = self.x[index_list, :] samp.y = self.y[index_list] samp.nsamp = self.x.shape[0] samp.nfeat = self.x.shape[1] samp.index = np.arange(0, samp.nsamp) samp.xmin = self.x.min(0, initial=-self.max_allow_x) samp.xmax = self.x.max(0, initial=self.max_allow_x) samp.ymin = self.y.min(initial=-self.max_allow_y) samp.ymax = self.y.max(initial=self.max_allow_y) return samp
def polygon_bound_grid(coords_list, div=10, intersect_check=False): """ Method to get square grid intersecting a polygon This function only accepts a list of coordinates: [[x1,y1],[x2,y2],...] :param coords_list: list of coordinates: [[x1,y1],[x2,y2],...] :param div: Number of divisions along x or y (default: 10) :param intersect_check: If only the intersecting coordinates should be returned :return: List of list of coordinates (square) """ temp_coords_list = Opt.__copy__(coords_list) if temp_coords_list[-1][0] != temp_coords_list[0][ 0] or temp_coords_list[-1][1] != temp_coords_list[0][1]: temp_coords_list.append(temp_coords_list[0]) bounds_wkt = Vector.wkt_from_coords(temp_coords_list, geom_type='polygon') bounds_geom = Vector.get_osgeo_geom(bounds_wkt) bounds_maxx = max(list(coord[0] for coord in temp_coords_list)) bounds_minx = min(list(coord[0] for coord in temp_coords_list)) bounds_maxy = max(list(coord[1] for coord in temp_coords_list)) bounds_miny = min(list(coord[1] for coord in temp_coords_list)) xcoords = Sublist.frange(bounds_minx, bounds_maxx, div=div) ycoords = Sublist.frange(bounds_miny, bounds_maxy, div=div).reverse() geom_list = list() for i in range(len(xcoords) - 1): for j in range(len(ycoords) - 1): geom_list.append([[xcoords[i], ycoords[j]], [xcoords[i + 1], ycoords[j]], [xcoords[i + 1], ycoords[j + 1]], [xcoords[i], ycoords[j + 1]], [xcoords[i], ycoords[j]]]) if intersect_check: wkt_list = list( Vector.wkt_from_coords(geom_coords, geom_type='polygon') for geom_coords in geom_list) index = list() for i, geom_wkt in enumerate(wkt_list): temp_geom = Vector.get_osgeo_geom(geom_wkt) if temp_geom.Intersects(bounds_geom): index.append(i) return list(geom_list[i] for i in index) else: return geom_list
def format_data(self): """ Method to format the samples to the RF model fit method :param self :return: dictionary of features and labels """ if self.columns is not None: column_list = [] column_list += self.columns.tolist() out_x = self.x[:, self.columns] out_x_name = list(self.x_name[i] for i in column_list) else: out_x = self.x out_x_name = self.x_name return { 'features': out_x.copy(), 'labels': self.y.copy(), 'label_name': Opt.__copy__(self.y_name), 'feature_names': Opt.__copy__(out_x_name), }