コード例 #1
0
ファイル: spider.py プロジェクト: liusha1219/multi-imbalance
    def _clean_nn(self, x):
        """
        Performs cleaning in the nearest neighborhood of x.

        :param x:
            Single observation.
        """
        nearest_neighbors = self._knn(x, self._ds_as_rs_union())
        for neighbor in nearest_neighbors:
            if self._class_of(neighbor) in self.majority_classes and \
                    self._class_of(neighbor) in self._min_cost_classes(x, self._ds_as_rs_union()):
                self.DS = setdiff(self.DS, np.array([neighbor]))
                self.RS = setdiff(self.RS, np.array([neighbor]))
コード例 #2
0
ファイル: spider.py プロジェクト: liusha1219/multi-imbalance
    def _fit_resample(self, X, y):
        """
        Performs resampling

        :param X:
            Numpy array of examples that is the subject of resampling.
        :param y:
            Numpy array of labels corresponding to examples from X.
        :return:
            Resampled X along with accordingly modified labels, resampled y
        """
        self._initialize_algorithm(X, y)

        self.DS = np.append(X, y.reshape(y.shape[0], 1), axis=1)
        self._restart_perspective()
        self._calculate_weak_majority_examples()
        self._restore_perspective()
        self.DS = setdiff(self.DS, self.RS)
        int_classes, min_classes = self._sort_by_cardinality(y)

        for int_min_class in int_classes + min_classes:
            self.relabel(int_min_class)
            self.clean(int_min_class)
            self.amplify(int_min_class)

        self.DS = union(self.DS, self.AS)

        return self.DS[:, :-1], self.DS[:, -1]
コード例 #3
0
    def fit_classifier(args):
        clf, X, y, resampled, maj_int_min = args
        x_sampled, y_sampled = resampled

        out_of_bag = setdiff(np.hstack((X, y[:, np.newaxis])),
                             np.hstack((x_sampled, y_sampled[:, np.newaxis])))
        x_out, y_out = out_of_bag[:, :-1], out_of_bag[:, -1].astype(int)

        x_resampled, y_resampled = SOUP(maj_int_min=maj_int_min).fit_resample(
            x_sampled, y_sampled)
        clf.fit(x_resampled, y_resampled)

        result = clf.predict_proba(x_out)
        class_sum_prob = np.sum(result, axis=0) + 0.001
        class_quantities = Counter(y_out)
        expected_sum_prob = np.array(
            [class_quantities[i] for i in range(len(Counter(y)))])
        try:
            global_weights = expected_sum_prob / class_sum_prob
        except Exception:
            global_weights = np.ones(shape=len(Counter(y)))
            print(
                f'Exc {Counter(y)} {Counter(y_out)} {result.shape} {expected_sum_prob.shape} {class_sum_prob.shape}'
            )
        return clf, global_weights
コード例 #4
0
def test_setdiff():
    arr1 = np.array([[1, 2, 3], [4, 5, 6]])
    arr2 = np.array([[1, 2, 3]])

    actual = setdiff(arr1, arr2)
    expected = np.array([[4, 5, 6]])

    assert (actual == expected).all()
コード例 #5
0
ファイル: spider.py プロジェクト: liusha1219/multi-imbalance
    def _relabel_nn(self, x):
        """
        Performs relabeling in the nearest neighborhood of x.

        :param x:
            An observation.
        """
        nearest_neighbors = self._knn(x, self._ds_as_rs_union())
        for neighbor in nearest_neighbors:
            if contains(self.RS, neighbor) and self._class_of(
                    neighbor) in self.majority_classes and self._class_of(
                        neighbor) in self._min_cost_classes(
                            x, self._ds_as_rs_union()):
                self.RS = setdiff(self.RS, np.array([neighbor]))
                neighbor[-1] = x[-1]
                self.AS = union(self.AS, np.array([neighbor]))
コード例 #6
0
ファイル: spider.py プロジェクト: liusha1219/multi-imbalance
    def _knn(self, x, DS):
        """
        Returns k nearest neighbors of x in DS that belong to c class if specified.

        :param x:
            Single observation
        :param DS:
            DS
        :param c:
            Class of neighbors that should be returned.
        :return:
            These neighbors from k nearest that belong to class c if specified. Otherwise all of them.
        """

        DS = setdiff(DS, np.array([x]))
        if DS.shape[0] < self.k:
            self.neigh_clf = NearestNeighbors(n_neighbors=DS.shape[0])
        else:
            self.neigh_clf = NearestNeighbors(n_neighbors=self.k)

        self.neigh_clf.fit(DS[:, :-1])

        within_radius = self.neigh_clf.radius_neighbors(
            [x[:-1]],
            radius=self.neigh_clf.kneighbors([x[:-1]],
                                             return_distance=True)[0][0][-1] +
            0.0001 * self.neigh_clf.kneighbors([x[:-1]],
                                               return_distance=True)[0][0][-1],
            return_distance=True)

        unique_distances = np.unique(sorted(within_radius[0][0]))
        all_distances = within_radius[0][0]
        all_indices = within_radius[1][0]
        indices = []
        for dist in unique_distances:
            if len(indices) < self.k:
                indices += (all_indices[all_distances == dist]).tolist()

        return DS[indices]