def test_sample_wrong_X(): """Test either if an error is raised when X is different at fitting and sampling""" # Create the object bc = BalanceCascade(random_state=RND_SEED) bc.fit(X, Y) assert_raises(RuntimeError, bc.sample, np.random.random((100, 40)), np.array([0] * 50 + [1] * 50))
def test_sample_wrong_X(): """Test either if an error is raised when X is different at fitting and sampling""" # Create the object bc = BalanceCascade(random_state=RND_SEED) bc.fit(X, Y) assert_raises(RuntimeError, bc.sample, np.random.random((100, 40)), np.array([0] * 50 + [1] * 50))
def test_bc_fit(): """Test the fitting method""" # Define the parameter for the under-sampling ratio = 'auto' # Create the object bc = BalanceCascade(ratio=ratio, random_state=RND_SEED) # Fit the data bc.fit(X, Y) # Check if the data information have been computed assert_equal(bc.min_c_, 0) assert_equal(bc.maj_c_, 1) assert_equal(bc.stats_c_[0], 8) assert_equal(bc.stats_c_[1], 12)
def test_bc_fit(): """Test the fitting method""" # Define the parameter for the under-sampling ratio = 'auto' # Create the object bc = BalanceCascade(ratio=ratio, random_state=RND_SEED) # Fit the data bc.fit(X, Y) # Check if the data information have been computed assert_equal(bc.min_c_, 0) assert_equal(bc.maj_c_, 1) assert_equal(bc.stats_c_[0], 500) assert_equal(bc.stats_c_[1], 4500)
''' Chaining ensemble of samplers and estimators 在集成分类器中,装袋方法(Bagging)在不同的随机选取的数据集上建立了多个估计量. 在scikit-learn中这个分类器叫做BaggingClassifier.然而,该分类器并不允许对每个数据集进行均衡. 因此,在对不均衡样本进行训练的时候,分类器其实是有偏的,偏向于多数类. ''' from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix from sklearn.ensemble import BaggingClassifier from sklearn.tree import DecisionTreeClassifier X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) bc = BaggingClassifier(base_estimator=DecisionTreeClassifier(), random_state=0) bc.fit(X_train, y_train) y_pred = bc.predict(X_test) print(confusion_matrix(y_test, y_pred)) ''' BalancedBaggingClassifier 允许在训练每个基学习器之前对每个子集进行重抽样. 简而言之,该方法结合了EasyEnsemble采样器与分类器(如BaggingClassifier)的结果. ''' from imblearn.ensemble import BalancedBaggingClassifier bbc = BalancedBaggingClassifier(base_estimator=DecisionTreeClassifier(), ratio='auto', replacement=False, random_state=0) bbc.fit(X, y) y_pred = bbc.predict(X_test)