コード例 #1
0
def test_sample_wrong_X():
    """Test either if an error is raised when X is different at fitting
    and sampling"""

    # Create the object
    bc = BalanceCascade(random_state=RND_SEED)
    bc.fit(X, Y)
    assert_raises(RuntimeError, bc.sample, np.random.random((100, 40)),
                  np.array([0] * 50 + [1] * 50))
コード例 #2
0
def test_sample_wrong_X():
    """Test either if an error is raised when X is different at fitting
    and sampling"""

    # Create the object
    bc = BalanceCascade(random_state=RND_SEED)
    bc.fit(X, Y)
    assert_raises(RuntimeError, bc.sample, np.random.random((100, 40)),
                  np.array([0] * 50 + [1] * 50))
コード例 #3
0
def test_bc_fit():
    """Test the fitting method"""

    # Define the parameter for the under-sampling
    ratio = 'auto'

    # Create the object
    bc = BalanceCascade(ratio=ratio, random_state=RND_SEED)
    # Fit the data
    bc.fit(X, Y)

    # Check if the data information have been computed
    assert_equal(bc.min_c_, 0)
    assert_equal(bc.maj_c_, 1)
    assert_equal(bc.stats_c_[0], 8)
    assert_equal(bc.stats_c_[1], 12)
コード例 #4
0
def test_bc_fit():
    """Test the fitting method"""

    # Define the parameter for the under-sampling
    ratio = 'auto'

    # Create the object
    bc = BalanceCascade(ratio=ratio, random_state=RND_SEED)
    # Fit the data
    bc.fit(X, Y)

    # Check if the data information have been computed
    assert_equal(bc.min_c_, 0)
    assert_equal(bc.maj_c_, 1)
    assert_equal(bc.stats_c_[0], 500)
    assert_equal(bc.stats_c_[1], 4500)
コード例 #5
0
'''
Chaining ensemble of samplers and estimators
在集成分类器中,装袋方法(Bagging)在不同的随机选取的数据集上建立了多个估计量.
在scikit-learn中这个分类器叫做BaggingClassifier.然而,该分类器并不允许对每个数据集进行均衡.
因此,在对不均衡样本进行训练的时候,分类器其实是有偏的,偏向于多数类.
'''
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
bc = BaggingClassifier(base_estimator=DecisionTreeClassifier(),
                       random_state=0)
bc.fit(X_train, y_train) 
y_pred = bc.predict(X_test)
print(confusion_matrix(y_test, y_pred))

'''
BalancedBaggingClassifier 允许在训练每个基学习器之前对每个子集进行重抽样. 
简而言之,该方法结合了EasyEnsemble采样器与分类器(如BaggingClassifier)的结果.
'''
from imblearn.ensemble import BalancedBaggingClassifier
bbc = BalancedBaggingClassifier(base_estimator=DecisionTreeClassifier(),
                                ratio='auto',
                                replacement=False,
                                random_state=0)
bbc.fit(X, y) 

y_pred = bbc.predict(X_test)