Beispiel #1
0
    def calc_dist_matrix(self, approach=2, verbose=False):
        """
        Method to calculate euclidean distance from each sample
         and make a matrix
        :return: 2d matrix
        """

        if verbose:
            Opt.cprint('Building distance matrix... ', newline='')

        if approach == 1:
            self.distance_matrix = np.apply_along_axis(
                lambda x: Euclidean.mat_dist(x, self.matrix), 1, self.matrix)

        elif approach == 2:
            ndims = self.matrix.shape[1]

            temp_mat = np.zeros([self.matrix.shape[0], self.matrix.shape[0]],
                                np.float32)

            for dim in range(ndims):
                arr = np.repeat(self.matrix[:, dim][:, np.newaxis], self.nsamp,
                                1)
                arr_ = arr.T
                temp_mat += (arr - arr_)**2

            self.distance_matrix = np.sqrt(temp_mat)

        else:
            raise ValueError('Unrecognized approach')

        if verbose:
            Opt.cprint('Done!')
Beispiel #2
0
    def proximity_filter(self, thresh=None, verbose=False):
        """
        method to remove points based on proximity threshold
        :param thresh: proximity threshold (default: 90th percentile) valid values: 1-99
        :param verbose: If steps should be displayed
        :return: None
        """
        if verbose:
            Opt.cprint('Applying proximity filter...')

        if thresh is None:
            thresh = self.cluster_center('percentile_90')
        elif 'percentile_' in thresh:
            thresh = self.cluster_center(thresh)
        elif thresh in (int, float):
            thresh = self.cluster_center('percentile_{}'.format(
                str(int(thresh))))
        else:
            if verbose:
                warnings.warn(
                    'Invalid thresh value.\n Using default: 90th percentile centroid vector.'
                )
            thresh = self.cluster_center('percentile_90')

        # number of close proximities associated with each element
        n_proxim = np.apply_along_axis(
            lambda x: np.count_nonzero((x > 0.0) & (x < thresh)), 0,
            self.distance_matrix)

        if verbose:
            Opt.cprint('Max group size : {} '.format(str(n_proxim.max())),
                       newline='')
            Opt.cprint('Min group size : {} '.format(str(n_proxim.min())))

        # sort the indices in increasing order of n_proxim
        idx = []
        idx += np.argsort(n_proxim).tolist()
        idx_out = list()

        # find indices of elements that should be removed
        for ii in idx:
            if ii not in idx_out:
                arr = self.distance_matrix[ii, 0:(ii + 1)]
                temp_list = (np.where((arr < thresh)
                                      & (arr > 0.0))[0]).tolist()
                idx_out += temp_list
                idx_out = list(set(idx_out))

        # sort the indices in decreasing order for pop()
        pop_idx = sorted(list(set(idx_out)), reverse=True)

        if verbose:
            Opt.cprint('Removing {} elements...'.format(str(len(pop_idx))))

        for pop_id in pop_idx:
            self.samples.pop(pop_id)

        self.nsamp = len(self.samples)
        self.index = list(range(self.nsamp))
Beispiel #3
0
    def selection(self,
                  index_list):
        """
        Method to select samples based on an index list
        :param index_list:
        :return: Samples object
        """
        if type(index_list).__name__ in ('list', 'tuple', 'NoneType'):
            index_list = np.array(Opt.__copy__(index_list))

        samp = Samples()
        samp.x_name = self.x_name
        samp.y_name = self.y_name
        samp.x = self.x[index_list, :]
        samp.y = self.y[index_list]
        samp.nsamp = self.x.shape[0]
        samp.nfeat = self.x.shape[1]
        samp.index = np.arange(0, samp.nsamp)

        samp.xmin = self.x.min(0, initial=-self.max_allow_x)
        samp.xmax = self.x.max(0, initial=self.max_allow_x)

        samp.ymin = self.y.min(initial=-self.max_allow_y)
        samp.ymax = self.y.max(initial=self.max_allow_y)

        return samp
Beispiel #4
0
    def polygon_bound_grid(coords_list, div=10, intersect_check=False):
        """
        Method to get square grid intersecting a polygon
        This function only accepts a list of coordinates: [[x1,y1],[x2,y2],...]
        :param coords_list: list of coordinates: [[x1,y1],[x2,y2],...]
        :param div: Number of divisions along x or y (default: 10)
        :param intersect_check: If only the intersecting coordinates should be returned
        :return: List of list of coordinates (square)
        """

        temp_coords_list = Opt.__copy__(coords_list)

        if temp_coords_list[-1][0] != temp_coords_list[0][
                0] or temp_coords_list[-1][1] != temp_coords_list[0][1]:
            temp_coords_list.append(temp_coords_list[0])

        bounds_wkt = Vector.wkt_from_coords(temp_coords_list,
                                            geom_type='polygon')
        bounds_geom = Vector.get_osgeo_geom(bounds_wkt)

        bounds_maxx = max(list(coord[0] for coord in temp_coords_list))
        bounds_minx = min(list(coord[0] for coord in temp_coords_list))
        bounds_maxy = max(list(coord[1] for coord in temp_coords_list))
        bounds_miny = min(list(coord[1] for coord in temp_coords_list))

        xcoords = Sublist.frange(bounds_minx, bounds_maxx, div=div)
        ycoords = Sublist.frange(bounds_miny, bounds_maxy, div=div).reverse()

        geom_list = list()

        for i in range(len(xcoords) - 1):
            for j in range(len(ycoords) - 1):
                geom_list.append([[xcoords[i], ycoords[j]],
                                  [xcoords[i + 1], ycoords[j]],
                                  [xcoords[i + 1], ycoords[j + 1]],
                                  [xcoords[i], ycoords[j + 1]],
                                  [xcoords[i], ycoords[j]]])

        if intersect_check:
            wkt_list = list(
                Vector.wkt_from_coords(geom_coords, geom_type='polygon')
                for geom_coords in geom_list)

            index = list()

            for i, geom_wkt in enumerate(wkt_list):
                temp_geom = Vector.get_osgeo_geom(geom_wkt)
                if temp_geom.Intersects(bounds_geom):
                    index.append(i)

            return list(geom_list[i] for i in index)

        else:
            return geom_list
Beispiel #5
0
    def format_data(self):
        """
        Method to format the samples to the RF model fit method
        :param self
        :return: dictionary of features and labels
        """
        if self.columns is not None:
            column_list = []
            column_list += self.columns.tolist()
            out_x = self.x[:, self.columns]
            out_x_name = list(self.x_name[i] for i in column_list)
        else:
            out_x = self.x
            out_x_name = self.x_name

        return {
            'features': out_x.copy(),
            'labels': self.y.copy(),
            'label_name': Opt.__copy__(self.y_name),
            'feature_names': Opt.__copy__(out_x_name),
        }