예제 #1
0
def test_basetransformer_params_are_attached():
    """Assert that the n_jobs and random_state params from atom are used."""
    atom = ATOMClassifier(X_bin, y_bin, random_state=1)
    atom.add(PCA())  # When left to default
    atom.add(PCA(random_state=2))  # When changed
    assert atom.pipeline[0].get_params()["random_state"] == 1
    assert atom.pipeline[1].get_params()["random_state"] == 2
예제 #2
0
def test_add_complete_dataset():
    """Assert that atom accepts transformers for the complete dataset."""
    atom = ATOMClassifier(X_bin, y_bin, random_state=1)
    atom.add(StandardScaler())
    assert check_scaling(atom.dataset)

    len_dataset = len(atom.dataset)
    atom.add(Pruner())
    assert len(atom.dataset) != len_dataset
예제 #3
0
def test_add_train_only():
    """Assert that atom accepts transformers for the train set only."""
    atom = ATOMClassifier(X_bin, y_bin, random_state=1)
    atom.add(StandardScaler(), train_only=True)
    assert check_scaling(atom.X_train) and not check_scaling(atom.X_test)

    len_train, len_test = len(atom.train), len(atom.test)
    atom.add(Pruner(), train_only=True)
    assert len(atom.train) != len_train and len(atom.test) == len_test
예제 #4
0
def test_add_pipeline():
    """Assert that adding a pipeline adds every individual step."""
    pipeline = Pipeline(steps=[
        ("scaler", StandardScaler()),
        ("sfm", SelectFromModel(RandomForestClassifier())),
    ])
    atom = ATOMClassifier(X_bin, y_bin, random_state=1)
    atom.add(pipeline)
    assert isinstance(atom.pipeline[0], StandardScaler)
    assert isinstance(atom.pipeline[1], SelectFromModel)
예제 #5
0
def test_keep_column_names():
    """Assert that the column names are kept after transforming."""
    atom = ATOMClassifier(X_bin, y_bin, random_state=1)

    # When the columns are only transformed
    cols = atom.columns.copy()
    atom.add(StandardScaler())
    assert atom.columns == cols

    # When columns were removed
    cols = atom.columns.copy()
    atom.add(SelectFromModel(RandomForestClassifier()))
    assert all(col in cols for col in atom.columns)
예제 #6
0
def test_sets_are_kept_equal():
    """Assert that the train and test sets always keep the same rows."""
    atom = ATOMClassifier(X_bin, y_bin, random_state=1)
    len_train, len_test = len(atom.train), len(atom.test)
    atom.add(Pruner())
    assert len(atom.train) < len_train and len(atom.test) < len_test
예제 #7
0
def test_duplicate_columns_are_ignored():
    """Assert that duplicate columns are ignored for the transformers."""
    atom = ATOMClassifier(X_bin, y_bin, random_state=1)
    atom.add(StandardScaler(), columns=["mean radius", "mean radius"])
    assert not atom["mean radius"].equals(X_bin["mean radius"])
예제 #8
0
def test_transformer_only_y():
    """Assert that atom accepts transformers with only an y parameter."""
    atom = ATOMClassifier(X10, y10_str, random_state=1)
    atom.add(LabelEncoder())
    assert np.all((atom["target"] == 0) | (atom["target"] == 1))