def test_basetransformer_params_are_attached(): """Assert that the n_jobs and random_state params from atom are used.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.add(PCA()) # When left to default atom.add(PCA(random_state=2)) # When changed assert atom.pipeline[0].get_params()["random_state"] == 1 assert atom.pipeline[1].get_params()["random_state"] == 2
def test_add_complete_dataset(): """Assert that atom accepts transformers for the complete dataset.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.add(StandardScaler()) assert check_scaling(atom.dataset) len_dataset = len(atom.dataset) atom.add(Pruner()) assert len(atom.dataset) != len_dataset
def test_add_train_only(): """Assert that atom accepts transformers for the train set only.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.add(StandardScaler(), train_only=True) assert check_scaling(atom.X_train) and not check_scaling(atom.X_test) len_train, len_test = len(atom.train), len(atom.test) atom.add(Pruner(), train_only=True) assert len(atom.train) != len_train and len(atom.test) == len_test
def test_add_pipeline(): """Assert that adding a pipeline adds every individual step.""" pipeline = Pipeline(steps=[ ("scaler", StandardScaler()), ("sfm", SelectFromModel(RandomForestClassifier())), ]) atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.add(pipeline) assert isinstance(atom.pipeline[0], StandardScaler) assert isinstance(atom.pipeline[1], SelectFromModel)
def test_keep_column_names(): """Assert that the column names are kept after transforming.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) # When the columns are only transformed cols = atom.columns.copy() atom.add(StandardScaler()) assert atom.columns == cols # When columns were removed cols = atom.columns.copy() atom.add(SelectFromModel(RandomForestClassifier())) assert all(col in cols for col in atom.columns)
def test_sets_are_kept_equal(): """Assert that the train and test sets always keep the same rows.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) len_train, len_test = len(atom.train), len(atom.test) atom.add(Pruner()) assert len(atom.train) < len_train and len(atom.test) < len_test
def test_duplicate_columns_are_ignored(): """Assert that duplicate columns are ignored for the transformers.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) atom.add(StandardScaler(), columns=["mean radius", "mean radius"]) assert not atom["mean radius"].equals(X_bin["mean radius"])
def test_transformer_only_y(): """Assert that atom accepts transformers with only an y parameter.""" atom = ATOMClassifier(X10, y10_str, random_state=1) atom.add(LabelEncoder()) assert np.all((atom["target"] == 0) | (atom["target"] == 1))