def calc_dist_matrix(self, approach=2, verbose=False): """ Method to calculate euclidean distance from each sample and make a matrix :return: 2d matrix """ if verbose: Opt.cprint('Building distance matrix... ', newline='') if approach == 1: self.distance_matrix = np.apply_along_axis( lambda x: Euclidean.mat_dist(x, self.matrix), 1, self.matrix) elif approach == 2: ndims = self.matrix.shape[1] temp_mat = np.zeros([self.matrix.shape[0], self.matrix.shape[0]], np.float32) for dim in range(ndims): arr = np.repeat(self.matrix[:, dim][:, np.newaxis], self.nsamp, 1) arr_ = arr.T temp_mat += (arr - arr_)**2 self.distance_matrix = np.sqrt(temp_mat) else: raise ValueError('Unrecognized approach') if verbose: Opt.cprint('Done!')
def proximity_filter(self, thresh=None, verbose=False): """ method to remove points based on proximity threshold :param thresh: proximity threshold (default: 90th percentile) valid values: 1-99 :param verbose: If steps should be displayed :return: None """ if verbose: Opt.cprint('Applying proximity filter...') if thresh is None: thresh = self.cluster_center('percentile_90') elif 'percentile_' in thresh: thresh = self.cluster_center(thresh) elif thresh in (int, float): thresh = self.cluster_center('percentile_{}'.format( str(int(thresh)))) else: if verbose: warnings.warn( 'Invalid thresh value.\n Using default: 90th percentile centroid vector.' ) thresh = self.cluster_center('percentile_90') # number of close proximities associated with each element n_proxim = np.apply_along_axis( lambda x: np.count_nonzero((x > 0.0) & (x < thresh)), 0, self.distance_matrix) if verbose: Opt.cprint('Max group size : {} '.format(str(n_proxim.max())), newline='') Opt.cprint('Min group size : {} '.format(str(n_proxim.min()))) # sort the indices in increasing order of n_proxim idx = [] idx += np.argsort(n_proxim).tolist() idx_out = list() # find indices of elements that should be removed for ii in idx: if ii not in idx_out: arr = self.distance_matrix[ii, 0:(ii + 1)] temp_list = (np.where((arr < thresh) & (arr > 0.0))[0]).tolist() idx_out += temp_list idx_out = list(set(idx_out)) # sort the indices in decreasing order for pop() pop_idx = sorted(list(set(idx_out)), reverse=True) if verbose: Opt.cprint('Removing {} elements...'.format(str(len(pop_idx)))) for pop_id in pop_idx: self.samples.pop(pop_id) self.nsamp = len(self.samples) self.index = list(range(self.nsamp))