コード例 #1
0
ファイル: distance.py プロジェクト: skycontrast/geosoup
    def calc_dist_matrix(self, approach=2, verbose=False):
        """
        Method to calculate euclidean distance from each sample
         and make a matrix
        :return: 2d matrix
        """

        if verbose:
            Opt.cprint('Building distance matrix... ', newline='')

        if approach == 1:
            self.distance_matrix = np.apply_along_axis(
                lambda x: Euclidean.mat_dist(x, self.matrix), 1, self.matrix)

        elif approach == 2:
            ndims = self.matrix.shape[1]

            temp_mat = np.zeros([self.matrix.shape[0], self.matrix.shape[0]],
                                np.float32)

            for dim in range(ndims):
                arr = np.repeat(self.matrix[:, dim][:, np.newaxis], self.nsamp,
                                1)
                arr_ = arr.T
                temp_mat += (arr - arr_)**2

            self.distance_matrix = np.sqrt(temp_mat)

        else:
            raise ValueError('Unrecognized approach')

        if verbose:
            Opt.cprint('Done!')
コード例 #2
0
ファイル: distance.py プロジェクト: skycontrast/geosoup
    def proximity_filter(self, thresh=None, verbose=False):
        """
        method to remove points based on proximity threshold
        :param thresh: proximity threshold (default: 90th percentile) valid values: 1-99
        :param verbose: If steps should be displayed
        :return: None
        """
        if verbose:
            Opt.cprint('Applying proximity filter...')

        if thresh is None:
            thresh = self.cluster_center('percentile_90')
        elif 'percentile_' in thresh:
            thresh = self.cluster_center(thresh)
        elif thresh in (int, float):
            thresh = self.cluster_center('percentile_{}'.format(
                str(int(thresh))))
        else:
            if verbose:
                warnings.warn(
                    'Invalid thresh value.\n Using default: 90th percentile centroid vector.'
                )
            thresh = self.cluster_center('percentile_90')

        # number of close proximities associated with each element
        n_proxim = np.apply_along_axis(
            lambda x: np.count_nonzero((x > 0.0) & (x < thresh)), 0,
            self.distance_matrix)

        if verbose:
            Opt.cprint('Max group size : {} '.format(str(n_proxim.max())),
                       newline='')
            Opt.cprint('Min group size : {} '.format(str(n_proxim.min())))

        # sort the indices in increasing order of n_proxim
        idx = []
        idx += np.argsort(n_proxim).tolist()
        idx_out = list()

        # find indices of elements that should be removed
        for ii in idx:
            if ii not in idx_out:
                arr = self.distance_matrix[ii, 0:(ii + 1)]
                temp_list = (np.where((arr < thresh)
                                      & (arr > 0.0))[0]).tolist()
                idx_out += temp_list
                idx_out = list(set(idx_out))

        # sort the indices in decreasing order for pop()
        pop_idx = sorted(list(set(idx_out)), reverse=True)

        if verbose:
            Opt.cprint('Removing {} elements...'.format(str(len(pop_idx))))

        for pop_id in pop_idx:
            self.samples.pop(pop_id)

        self.nsamp = len(self.samples)
        self.index = list(range(self.nsamp))