コード例 #1
0
    def test_pickle_pipeline_and_nimbusml_pipeline(self):
        (X_train, y_train) = get_X_y(train_file,
                                     label_column, sep=',',
                                     features=selected_features)
        (X_test, y_test) = get_X_y(test_file,
                                   label_column, sep=',',
                                   features=selected_features)
        if 'F1' in X_train.columns:
            raise Exception("F1 is in the dataset")
        cat = OneHotVectorizer() << 'age'
        ftree = FastTreesBinaryClassifier()
        nimbusmlpipe = nimbusmlPipeline([cat, ftree])
        skpipe = Pipeline(steps=[('nimbusml', nimbusmlpipe)])
        skpipe.fit(X_train, y_train)

        scores = skpipe.predict(X_test)
        accu1 = np.mean(y_test.values.ravel() == scores["PredictedLabel"].values)

        # Unpickle model and score. We should get the exact same accuracy as
        # above
        s = pickle.dumps(skpipe)
        pipe2 = pickle.loads(s)
        scores2 = pipe2.predict(X_test)
        accu2 = np.mean(y_test.values.ravel() == scores2["PredictedLabel"].values)
        assert_equal(
            accu1,
            accu2,
            "accuracy mismatch after unpickling pipeline")
        assert_frame_equal(scores, scores2)
コード例 #2
0
    def test_pipeline_clone(self):
        (X_train, y_train) = get_X_y(train_file,
                                     label_column, sep=',',
                                     features=selected_features)
        (X_test, y_test) = get_X_y(test_file,
                                   label_column, sep=',',
                                   features=selected_features)
        if 'F1' in X_train.columns:
            raise Exception("F1 is in the dataset")
        cat = OneHotVectorizer() << 'age'
        ftree = FastTreesBinaryClassifier()
        nimbusmlpipe = nimbusmlPipeline([cat, ftree])
        skpipe = Pipeline(steps=[('nimbusml', nimbusmlpipe)])
        skpipe.fit(X_train, y_train)

        scores = skpipe.predict(X_test)

        copy = clone(skpipe)
        scores2 = copy.predict(X_test)
        assert_frame_equal(scores, scores2)

        # checks we can fit again
        skpipe.fit(X_train, y_train)
        scores3 = skpipe.predict(X_test)
        assert_frame_equal(scores, scores3)
コード例 #3
0
 def test_lr_named_steps_iris(self):
     iris = load_iris()
     X = iris.data[:, :2]  # we only take the first two features.
     y = iris.target
     df = pd.DataFrame(X, columns=['X1', 'X2'])
     df['Label'] = y
     pipe = nimbusmlPipeline([('norm', MeanVarianceScaler() << ['X1', 'X2']),
                         ('lr',
                          LogisticRegressionClassifier() << ['X1', 'X2'])])
     pipe.fit(df)
     pred = pipe.predict(df).head()
     assert len(pred) == 5
コード例 #4
0
ファイル: test_errors.py プロジェクト: zyw400/NimbusML-1
    def test_char_tokenizer(self):

        customer_reviews = pd.DataFrame(data=dict(review=[
            "I really did not like the taste of it",
            "It was surprisingly quite good!",
            "I will never ever ever go to that place again!!",
            "The best ever!! It was amazingly good and super fast",
            "I wish I had gone earlier, it was that great",
            "somewhat dissapointing. I'd probably wont try again",
            "Never visit again... rascals!"
        ]))

        tokenize = CharTokenizer(['review'])
        concat = ColumnConcatenator() >> 'features' << [['review']]
        pipeline = nimbusmlPipeline([concat, tokenize])
        y = pipeline.fit_transform(customer_reviews)
        assert y is not None
コード例 #5
0
    def test_pipeline_get_params(self):

        (X_train, y_train) = get_X_y(train_file,
                                     label_column, sep=',',
                                     features=selected_features)
        (X_test, y_test) = get_X_y(test_file,
                                   label_column, sep=',',
                                   features=selected_features)
        if 'F1' in X_train.columns:
            raise Exception("F1 is in the dataset")
        cat = OneHotVectorizer() << 'age'
        ftree = FastTreesBinaryClassifier()
        nimbusmlpipe = nimbusmlPipeline([cat, ftree])
        skpipe = Pipeline(steps=[('nimbusml', nimbusmlpipe)])
        skpipe.fit(X_train, y_train)
        pars = skpipe.get_params(deep=True)
        assert 'steps' in pars
        step = pars['steps'][0]
        assert len(step) == 2
        assert 'nimbusml' in pars
        assert 'nimbusml__random_state' in pars
        assert 'nimbusml__steps' in pars