def test_zero_count(): """Ensure that the TPOT _zero_count preprocessor outputs the input dataframe when no. of training features is 0""" tpot_obj = TPOT() assert np.array_equal( tpot_obj._zero_count(training_testing_data.ix[:, -3:]), training_testing_data.ix[:, -3:])
def test_rbf(): """Assert that the TPOT RBFSampler outputs the input dataframe when # of training features is 0""" tpot_obj = TPOT() assert np.array_equal(tpot_obj._rbf(training_testing_data.ix[:, -3:], 0.1), training_testing_data.ix[:, -3:])
def test_fit(): """Assert that the TPOT fit function provides an optimized pipeline""" tpot_obj = TPOT(random_state=42, population_size=1, generations=1, verbosity=0) tpot_obj.fit(training_features, training_classes) assert isinstance(tpot_obj._optimized_pipeline, creator.Individual) assert tpot_obj.gp_generation == 0
def test_nystroem(): """Assert that the TPOT Nystroem preprocessor outputs the input dataframe when the number of training features is 0""" tpot_obj = TPOT() assert np.array_equal(tpot_obj._nystroem(training_testing_data.ix[:, -3:], 1, 0.1, 1), training_testing_data.ix[:, -3:])
def test_select_fwe(): """Ensure that the TPOT select fwe outputs the input dataframe when no. of training features is 0""" tpot_obj = TPOT() assert np.array_equal( tpot_obj._select_fwe(training_testing_data.ix[:, -3:], 0.005), training_testing_data.ix[:, -3:])
def test_fast_ica(): """Assert that the TPOT FastICA preprocessor outputs the input dataframe when the number of training features is 0""" tpot_obj = TPOT() assert np.array_equal(tpot_obj._fast_ica(training_testing_data.ix[:, -3:], 1.0), training_testing_data.ix[:, -3:])
def test_feat_agg(): """Assert that the TPOT FeatureAgglomeration preprocessor outputs the input dataframe when the number of training features is 0""" tpot_obj = TPOT() assert np.array_equal(tpot_obj._feat_agg(training_testing_data.ix[:, -3:], 5, 1, 1), training_testing_data.ix[:, -3:])
def test_max_abs_scaler(): """Ensure that the TPOT max abs scaler outputs the input dataframe when no. of training features is 0""" tpot_obj = TPOT() assert np.array_equal( tpot_obj._max_abs_scaler(training_testing_data.ix[:, -3:]), training_testing_data.ix[:, -3:])
def test_polynomial_features(): """Ensure that the TPOT polynomial features outputs the input dataframe when no. of training features is 0""" tpot_obj = TPOT() assert np.array_equal( tpot_obj._polynomial_features(training_testing_data.ix[:, -3:]), training_testing_data.ix[:, -3:])
def test_binarizer(): """Ensure that the TPOT binarizer outputs the input dataframe when no. of training features is 0""" tpot_obj = TPOT() assert np.array_equal( tpot_obj._binarizer(training_testing_data.ix[:, -3:], 0), training_testing_data.ix[:, -3:])
def test_rfe(): """Ensure that the TPOT RFE outputs the input dataframe when no. of training features is 0""" tpot_obj = TPOT() assert np.array_equal( tpot_obj._rfe(training_testing_data.ix[:, -3:], 0, 0.1), training_testing_data.ix[:, -3:])
def test_gen(): """Assert that TPOT's gen_grow_safe function returns a pipeline of expected structure""" tpot_obj = TPOT() pipeline = tpot_obj._gen_grow_safe(tpot_obj._pset, 1, 3) assert len(pipeline) > 1 assert pipeline[0].ret == Output_DF
def test_df_feature_selection(): tpot_obj = TPOT() top_10_feature_pairs = ['00002', '00013', '00020', '00021', '00026', '00042', '00043', '00058', '00061', 'class', 'group', 'guess'] assert np.array_equal(tpot_obj._dt_feature_selection(training_testing_data, 10).columns.values, top_10_feature_pairs)
def test_export(): """Assert that TPOT's export function throws a ValueError when no optimized pipeline exists""" tpot_obj = TPOT() try: tpot_obj.export("test_export.py") assert False # Should be unreachable except ValueError: pass
def test_train_model_and_predict(): """Ensure that the TPOT train_model_and_predict returns the input dataframe when it has only 3 columns i.e. class, group, guess""" tpot_obj = TPOT() assert np.array_equal( training_testing_data.ix[:, -3:], tpot_obj._train_model_and_predict(training_testing_data.ix[:, -3:], SVC))
def test_export(): """Ensure that the TPOT export function raises a ValueError when no optimized pipeline exists""" tpot_obj = TPOT() try: tpot_obj.export('will_not_output') assert False # Should be unreachable except ValueError: pass
def test_predict(): """Ensure that the TPOT predict function raises a ValueError when no optimized pipeline exists""" tpot_obj = TPOT() try: tpot_obj.predict(testing_features) assert False # Should be unreachable except ValueError: pass
def test_variance_threshold(): tpot_obj = TPOT() non_feature_columns = ['class', 'group', 'guess'] training_features = training_testing_data.loc[training_testing_data['group'] == 'training'].drop(non_feature_columns, axis=1) selector = VarianceThreshold(threshold=0) selector.fit(training_features) mask = selector.get_support(True) mask_cols = list(training_features.iloc[:, mask].columns) + non_feature_columns assert np.array_equal(tpot_obj._variance_threshold(training_testing_data, 0), training_testing_data[mask_cols])
def test_score(): """Assert that the TPOT score function raises a ValueError when no optimized pipeline exists""" tpot_obj = TPOT() try: tpot_obj.score(testing_features, testing_classes) assert False # Should be unreachable except ValueError: pass
def test_passive_aggressive_2(): """Ensure that the TPOT PassiveAggressiveClassifier outputs the same as the sklearn classifier when C == 0.0""" tpot_obj = TPOT() result = tpot_obj._passive_aggressive(training_testing_data, 0.0, 0) result = result[result['group'] == 'testing'] pagg = PassiveAggressiveClassifier(C=0.0001, loss='hinge', fit_intercept=True, random_state=42) pagg.fit(training_features, training_classes) assert np.array_equal(result['guess'].values, pagg.predict(testing_features))
def test_knnc_2(): """Ensure that the TPOT k-nearest neighbor classifier outputs the same as the sklearn classifier when n_neighbor=0""" tpot_obj = TPOT() result = tpot_obj._knnc(training_testing_data, 0, 0) result = result[result['group'] == 'testing'] knnc = KNeighborsClassifier(n_neighbors=2, weights='uniform') knnc.fit(training_features, training_classes) assert np.array_equal(result['guess'].values, knnc.predict(testing_features))
def test_multinomial_nb(): """Ensure that the TPOT MultinomialNB outputs the same as the sklearn MultinomialNB""" tpot_obj = TPOT() result = tpot_obj._multinomial_nb(training_testing_data, 1.0) result = result[result['group'] == 'testing'] mnb = MultinomialNB(alpha=1.0, fit_prior=True) mnb.fit(training_features, training_classes) assert np.array_equal(result['guess'].values, mnb.predict(testing_features))
def test_random_forest_2(): """Ensure that the TPOT random forest method outputs the same as the sklearn random forest when min_weight>0.5""" tpot_obj = TPOT() result = tpot_obj._random_forest(training_testing_data, 0.6) result = result[result['group'] == 'testing'] rfc = RandomForestClassifier(n_estimators=500, min_weight_fraction_leaf=0.5, random_state=42, n_jobs=-1) rfc.fit(training_features, training_classes) assert np.array_equal(result['guess'].values, rfc.predict(testing_features))
def test_bernoulli_nb(): """Ensure that the TPOT BernoulliNB outputs the same as the sklearn BernoulliNB""" tpot_obj = TPOT() result = tpot_obj._bernoulli_nb(training_testing_data, 1.0, 0.0) result = result[result['group'] == 'testing'] bnb = BernoulliNB(alpha=1.0, binarize=0.0, fit_prior=True) bnb.fit(training_features, training_classes) assert np.array_equal(result['guess'].values, bnb.predict(testing_features))
def test_extra_trees_3(): """Ensure that the TPOT ExtraTreesClassifier outputs the same as the sklearn version when min_weight > 0.5""" tpot_obj = TPOT() result = tpot_obj._extra_trees(training_testing_data, 0, 1., 0.6) result = result[result['group'] == 'testing'] etc = ExtraTreesClassifier(n_estimators=500, random_state=42, max_features=1., min_weight_fraction_leaf=0.5, criterion='gini') etc.fit(training_features, training_classes) assert np.array_equal(result['guess'].values, etc.predict(testing_features))
def test_xgboost(): """Ensure that the TPOT xgboost method outputs the same as the xgboost classfier method""" tpot_obj = TPOT() result = tpot_obj._xgradient_boosting(training_testing_data, n_estimators=100, learning_rate=0, max_depth=3) result = result[result['group'] == 'testing'] xgb = XGBClassifier(n_estimators=100, learning_rate=0.0001, max_depth=3, seed=42) xgb.fit(training_features, training_classes) assert np.array_equal(result['guess'].values, xgb.predict(testing_features))
def test_random_forest(): """Ensure that the TPOT random forest method outputs the same as the sklearn random forest""" tpot_obj = TPOT() result = tpot_obj._random_forest(training_testing_data, 100, 0) result = result[result['group'] == 'testing'] rfc = RandomForestClassifier(n_estimators=100, max_features='auto', random_state=42, n_jobs=-1) rfc.fit(training_features, training_classes) assert np.array_equal(result['guess'].values, rfc.predict(testing_features))
def test_decision_tree(): """Ensure that the TPOT decision tree method outputs the same as the sklearn decision tree""" tpot_obj = TPOT() result = tpot_obj._decision_tree(training_testing_data, 0, 0) result = result[result['group'] == 'testing'] dtc = DecisionTreeClassifier(max_features='auto', max_depth=None, random_state=42) dtc.fit(training_features, training_classes) assert np.array_equal(result['guess'].values, dtc.predict(testing_features))
def test_gaussian_nb(): """Ensure that the TPOT GaussianNB outputs the same as the sklearn GaussianNB""" tpot_obj = TPOT() result = tpot_obj._gaussian_nb(training_testing_data) result = result[result['group'] == 'testing'] gnb = GaussianNB() gnb.fit(training_features, training_classes) assert np.array_equal(result['guess'].values, gnb.predict(testing_features))
def test_ada_boost_2(): """Ensure that the TPOT AdaBoostClassifier outputs the same as the sklearn classifer when learning_rate == 0.0""" tpot_obj = TPOT() result = tpot_obj._ada_boost(training_testing_data, 0.0) result = result[result['group'] == 'testing'] adaboost = AdaBoostClassifier(n_estimators=500, random_state=42, learning_rate=0.0001) adaboost.fit(training_features, training_classes) assert np.array_equal(result['guess'].values, adaboost.predict(testing_features))
def test_logistic_regression(): """Ensure that the TPOT logistic regression classifier outputs the same as the sklearn LogisticRegression""" tpot_obj = TPOT() result = tpot_obj._logistic_regression(training_testing_data, 5., 0, True) result = result[result['group'] == 'testing'] lrc = LogisticRegression(C=5., penalty='l1', dual=False, random_state=42) lrc.fit(training_features, training_classes) assert np.array_equal(result['guess'].values, lrc.predict(testing_features))
def test_svc_2(): """Ensure that the TPOT random forest method outputs the same as the sklearn svc when C<0.0001""" tpot_obj = TPOT() result = tpot_obj._svc(training_testing_data, 0.00001) result = result[result['group'] == 'testing'] svc = SVC(C=0.0001, random_state=42) svc.fit(training_features, training_classes) assert np.array_equal(result['guess'].values, svc.predict(testing_features))
def test_linear_svc_2(): """Ensure that the TPOT LinearSVC outputs the same as the sklearn LinearSVC when C == 0.0""" tpot_obj = TPOT() result = tpot_obj._linear_svc(training_testing_data, 0.0, 0, True) result = result[result['group'] == 'testing'] lsvc = LinearSVC(C=0.0001, penalty='l1', dual=False, random_state=42) lsvc.fit(training_features, training_classes) assert np.array_equal(result['guess'].values, lsvc.predict(testing_features))
def test_decision_tree_3(): """Ensure that the TPOT decision tree method outputs the same as the sklearn decision tree when min_weight>0.5""" tpot_obj = TPOT() result = tpot_obj._decision_tree(training_testing_data, 0.6) result = result[result['group'] == 'testing'] dtc = DecisionTreeClassifier(min_weight_fraction_leaf=0.5, random_state=42) dtc.fit(training_features, training_classes) assert np.array_equal(result['guess'].values, dtc.predict(testing_features))
def test_df_feature_selection(): tpot_obj = TPOT() top_10_feature_pairs = [ '00002', '00013', '00020', '00021', '00026', '00042', '00043', '00058', '00061', 'class', 'group', 'guess' ] assert np.array_equal( tpot_obj.dt_feature_selection(training_testing_data, 10).columns.values, top_10_feature_pairs)
def test_combine_dfs_2(): """Check combine_dfs operator when the dataframes are equal""" tpot_obj = TPOT() df1 = pd.DataFrame({'a': range(10), 'b': range(10, 20)}) df2 = pd.DataFrame({'a': range(10), 'b': range(10, 20)}) combined_df = pd.DataFrame({'a': range(10), 'b': range(10, 20)}) assert tpot_obj._combine_dfs(df1, df2).equals(combined_df)
def test_gradient_boosting_2(): """Ensure that the TPOT GradientBoostingClassifier outputs the same as the sklearn classifier when max_depth < 1""" tpot_obj = TPOT() result = tpot_obj._gradient_boosting(training_testing_data, 1.0, 0) result = result[result['group'] == 'testing'] gbc = GradientBoostingClassifier(learning_rate=1.0, max_depth=1, n_estimators=500, random_state=42) gbc.fit(training_features, training_classes) assert np.array_equal(result['guess'].values, gbc.predict(testing_features))
def test_linear_svc(): """Ensure that the TPOT LinearSVC outputs the same as the sklearn LinearSVC""" tpot_obj = TPOT() result = tpot_obj._linear_svc(training_testing_data, 1.0, 0, 0) result = result[result['group'] == 'testing'] lsvc = LinearSVC(C=1.0, loss='hinge', fit_intercept=True, random_state=42) lsvc.fit(training_features, training_classes) assert np.array_equal(result['guess'].values, lsvc.predict(testing_features))
def test_predict_2(): """Ensure that the TPOT predict function returns a DataFrame of shape (num_testing_rows,)""" tpot_obj = TPOT() tpot_obj._training_classes = training_classes tpot_obj._training_features = training_features tpot_obj._optimized_pipeline = creator.Individual.\ from_string('_logistic_regression(input_df, 1.0, 0, True)', tpot_obj._pset) result = tpot_obj.predict(testing_features) assert result.shape == (testing_features.shape[0],)