예제 #1
0
    def test_score_regressor(self):
        np.random.seed(0)
        df = get_dataset("iris").as_df()
        df.drop(['Species'], inplace=True, axis=1)
        df.Label = [1 if x == 1 else 0 for x in df.Label]
        X_train, X_test, y_train, y_test = \
            train_test_split(df.loc[:, df.columns != 'Label'], df['Label'])

        lr = FastTreesRegressor(train_threads=1)
        e = Pipeline([lr])
        e.fit(X_train, y_train.to_frame())
        metrics = e.score(X_test, y_test)
        print(metrics)
        assert_almost_equal(metrics,
                            0.814061733686017,
                            decimal=5,
                            err_msg="L1 loss should be %s" % 0.814061733686017)
예제 #2
0
    def test_score_binary(self):
        np.random.seed(0)
        df = get_dataset("iris").as_df()
        df.drop(['Species'], inplace=True, axis=1)
        df.Label = [1 if x == 1 else 0 for x in df.Label]
        X_train, X_test, y_train, y_test = \
            train_test_split(df.loc[:, df.columns != 'Label'], df['Label'])

        lr = LogisticRegressionBinaryClassifier(train_threads=1)
        e = Pipeline([lr])
        e.fit(X_train, y_train)
        metrics = e.score(X_test, y_test)
        print(metrics)
        assert_almost_equal(metrics,
                            0.9801136363636364,
                            decimal=5,
                            err_msg="AUC should be %s" % 0.9801136363636364)
예제 #3
0
    def test_score_multiclass(self):
        np.random.seed(0)
        df = get_dataset("iris").as_df()
        df.drop(['Species'], inplace=True, axis=1)
        df.Label = [1 if x == 1 else 0 for x in df.Label]
        X_train, X_test, y_train, y_test = \
            train_test_split(df.loc[:, df.columns != 'Label'], df['Label'])

        lr = LogisticRegressionClassifier(train_threads=1)
        e = Pipeline([lr])
        e.fit(X_train, y_train.to_frame())
        metrics = e.score(X_test, y_test)
        print(metrics)
        assert_almost_equal(metrics,
                            0.7631578947368421,
                            decimal=5,
                            err_msg="Accuracy(micro-avg) should be %s" %
                            0.7631578947368421)
예제 #4
0
    def test_score_clusterer(self):
        np.random.seed(0)
        df = get_dataset("iris").as_df()
        df.drop(['Species'], inplace=True, axis=1)
        df.Label = [1 if x == 1 else 0 for x in df.Label]
        X_train, X_test, y_train, y_test = \
            train_test_split(df.loc[:, df.columns != 'Label'], df['Label'])

        lr = KMeansPlusPlus(n_clusters=2,
                            init_algorithm="Random",
                            train_threads=1)
        e = Pipeline([lr])
        e.fit(X_train, y_train.to_frame())
        metrics = e.score(X_test, y_test)
        print(metrics)
        assert_almost_equal(metrics,
                            0.36840763005544264,
                            decimal=5,
                            err_msg="NMI loss should be %s" %
                            0.36840763005544264)
예제 #5
0
 def test_score_anomalydetection(self):
     np.random.seed(0)
     df = get_dataset("iris").as_df().drop(['Label', 'Species'], axis=1)
     X_train, X_test = train_test_split(df)
     X_test.is_copy = False
     X_train = X_train[X_train['Setosa'] == 1]
     y_test = X_test['Setosa'].apply(lambda x: 1 if x == 0 else 0)
     X_train.drop(['Setosa'], axis=1, inplace=True)
     X_test.drop(['Setosa'], axis=1, inplace=True)
     svm = OneClassSvmAnomalyDetector()  # noqa
     e = Pipeline([svm])
     e.fit(X_train)
     if e.nodes[-1].label_column_ is not None:
         raise ValueError("'{0}' should be None".format(
             e.nodes[-1].label_column_))
     assert y_test.name == 'Setosa'
     metrics = e.score(X_test, y_test)
     print(metrics)
     assert_almost_equal(metrics,
                         1.0,
                         decimal=5,
                         err_msg="AUC should be %s" % 1.0)