Ejemplo n.º 1
0
    def testAccuracyScore(self):
        y_pred = [0, 2, 1, 3]
        y_true = [0, 1, 2, 3]

        score = accuracy_score(y_true, y_pred)
        result = self.executor.execute_tileables([score])[0]
        expected = sklearn_accuracy_score(y_true, y_pred)
        self.assertAlmostEqual(result, expected)

        score = accuracy_score(y_true, y_pred, normalize=False)
        result = self.executor.execute_tileables([score])[0]
        expected = sklearn_accuracy_score(y_true, y_pred, normalize=False)
        self.assertAlmostEqual(result, expected)

        y_pred = np.array([[0, 1], [1, 1]])
        y_true = np.ones((2, 2))
        score = accuracy_score(y_true, y_pred)
        result = self.executor.execute_tileables([score])[0]
        expected = sklearn_accuracy_score(y_true, y_pred)
        self.assertAlmostEqual(result, expected)

        sample_weight = [0.7, 0.3]
        score = accuracy_score(y_true, y_pred, sample_weight=sample_weight)
        result = self.executor.execute_tileables([score])[0]
        expected = sklearn_accuracy_score(y_true, y_pred, sample_weight=sample_weight)
        self.assertAlmostEqual(result, expected)

        score = accuracy_score(mt.tensor(y_true), mt.tensor(y_pred),
                               sample_weight=mt.tensor(sample_weight), normalize=False)
        result = self.executor.execute_tileables([score])[0]
        expected = sklearn_accuracy_score(y_true, y_pred, sample_weight=sample_weight,
                                          normalize=False)
        self.assertAlmostEqual(result, expected)
Ejemplo n.º 2
0
def test_accuracy_score(setup):
    y_pred = [0, 2, 1, 3]
    y_true = [0, 1, 2, 3]

    score = accuracy_score(y_true, y_pred)
    result = score.execute().fetch()
    expected = sklearn_accuracy_score(y_true, y_pred)
    assert pytest.approx(result) == expected

    score = accuracy_score(y_true, y_pred, normalize=False)
    result = score.execute().fetch()
    expected = sklearn_accuracy_score(y_true, y_pred, normalize=False)
    assert pytest.approx(result) == expected

    y_pred = np.array([[0, 1], [1, 1]])
    y_true = np.ones((2, 2))
    score = accuracy_score(y_true, y_pred)
    result = score.execute().fetch()
    expected = sklearn_accuracy_score(y_true, y_pred)
    assert pytest.approx(result) == expected

    sample_weight = [0.7, 0.3]
    score = accuracy_score(y_true, y_pred, sample_weight=sample_weight)
    result = score.execute().fetch()
    expected = sklearn_accuracy_score(y_true, y_pred, sample_weight=sample_weight)
    assert pytest.approx(result) == expected

    score = accuracy_score(mt.tensor(y_true), mt.tensor(y_pred),
                           sample_weight=mt.tensor(sample_weight), normalize=False)
    result = score.execute().fetch()
    expected = sklearn_accuracy_score(y_true, y_pred, sample_weight=sample_weight,
                                      normalize=False)
    assert pytest.approx(result) == expected
Ejemplo n.º 3
0
    def evaluate_test_perf(self, trained_model, X_test, y_test, threshold=0.5):
        with PerfTimer(self, 'score_timer'):

            if 'XGBoost' in self.model_type:
                dtest = xgboost.dask.DaskDMatrix(self.client, X_test, y_test)
                predictions = xgboost.dask.predict(self.client, trained_model,
                                                   dtest).compute()
                predictions = np.where(
                    predictions >= threshold, 1,
                    0)  # threshold returned probabilities into 0/1 labels

            elif 'RandomForest' in self.model_type:
                predictions = trained_model.predict(X_test)
                if 'multi-CPU' not in self.compute_type:
                    predictions = predictions.compute()

            if 'multi' in self.compute_type:
                y_test = y_test.compute()

            if 'GPU' in self.compute_type:
                test_accuracy = cuml_accuracy_score(y_test, predictions)
            elif 'CPU' in self.compute_type:
                test_accuracy = sklearn_accuracy_score(y_test, predictions)

        # accumulate internal list
        return test_accuracy
    def predict(self, trained_model, X_test, y_test, threshold=0.5):
        """ Inference with the trained model on the unseen test data """
        with PerfTimer(f'predict [ {self.model_type} ]'):

            if 'XGBoost' in self.model_type:
                if 'single' in self.compute_type:
                    dtest = xgboost.DMatrix(X_test, y_test)
                    predictions = trained_model.predict(dtest)
                    predictions = (predictions > threshold) * 1.0

                elif 'multi' in self.compute_type:
                    dtest = xgboost.dask.DaskDMatrix(self.client, X_test,
                                                     y_test)
                    predictions = xgboost.dask.predict(self.client,
                                                       trained_model,
                                                       dtest).compute()
                    predictions = (predictions > threshold) * 1.0
                    y_test = y_test.compute()

                if 'GPU' in self.compute_type:
                    test_accuracy = cuml_accuracy_score(y_test, predictions)
                elif 'CPU' in self.compute_type:
                    test_accuracy = sklearn_accuracy_score(y_test, predictions)

            elif 'RandomForest' in self.model_type:
                if 'single' in self.compute_type:
                    test_accuracy = trained_model.score(X_test, y_test)

                elif 'multi' in self.compute_type:

                    if 'GPU' in self.compute_type:
                        y_test = y_test.compute()
                        predictions = trained_model.predict(X_test).compute()
                        test_accuracy = cuml_accuracy_score(
                            y_test, predictions)

                    elif 'CPU' in self.compute_type:
                        test_accuracy = sklearn_accuracy_score(
                            y_test, trained_model.predict(X_test))

            print(f'    subfold score: {test_accuracy}\n')
            self.cv_fold_scores += [test_accuracy]
            return test_accuracy
Ejemplo n.º 5
0
def test_dataframe_accuracy_score(setup):
    rs = np.random.RandomState(0)
    raw = pd.DataFrame({'a': rs.randint(0, 10, (10,)),
                        'b': rs.randint(0, 10, (10,))})

    df = md.DataFrame(raw)
    y = df['a'].to_tensor().astype('int')
    pred = df['b'].astype('int')

    score = accuracy_score(y, pred)
    expect = sklearn_accuracy_score(raw['a'].to_numpy().astype('int'),
                                    raw['b'].to_numpy().astype('int'))
    assert pytest.approx(score.fetch()) == expect
Ejemplo n.º 6
0
    def predict ( self, trained_model, X_test, y_test, threshold = 0.5 ):
        with PerfTimer(f'predict [ {self.model_type} ]'):
            
            if 'XGBoost' in self.model_type:              
                if 'single' in self.compute_type:  
                    dtest = xgboost.DMatrix( X_test, y_test)
                    predictions = trained_model.predict( dtest )
                    predictions = (predictions > threshold ) * 1.0

                elif 'multi' in self.compute_type:  
                    dtest = xgboost.dask.DaskDMatrix( self.client, X_test, y_test)
                    predictions = xgboost.dask.predict( self.client, trained_model, dtest).compute() 
                    predictions = (predictions > threshold ) * 1.0                    
                    y_test = y_test.compute()
                    
                if 'GPU' in self.compute_type:                
                    test_accuracy = cuml_accuracy_score ( y_test, predictions )
                elif 'CPU' in self.compute_type:
                    test_accuracy = sklearn_accuracy_score ( y_test, predictions )

            elif 'RandomForest' in self.model_type:
                if 'single' in self.compute_type:  
                    test_accuracy = trained_model.score( X_test, y_test )
                    
                elif 'multi' in self.compute_type:                    

                    if 'GPU' in self.compute_type:
                        y_test = y_test.compute()   
                        predictions = trained_model.predict( X_test ).compute()
                        test_accuracy = cuml_accuracy_score ( y_test, predictions )

                    elif 'CPU' in self.compute_type:
                        test_accuracy = sklearn_accuracy_score ( y_test, trained_model.predict( X_test ) )

            # accumulate internal list    
            self.scores += [ test_accuracy ]            
            return test_accuracy
Ejemplo n.º 7
0
def accuracy_score(y_pred, y_true, title):
    # plot confusion matrix for classes
    cnf_matrix = confusion_matrix(y_true, y_pred)
    print(title, cnf_matrix)
    np.savetxt(util.home_dir + '/' + title + '.csv', cnf_matrix, delimiter=',')
    np.set_printoptions(precision=2)
    plt.figure()
    if np.unique(y_true).size > 2:
        plot_confusion_matrix(cnf_matrix, classes=util.newsgroups, title=title)
    else:
        plot_confusion_matrix(cnf_matrix,
                              classes=['Liberal', 'Conservative'],
                              title=title)
    confusion_matrices_pdf.savefig(plt.gcf())

    return sklearn_accuracy_score(y_true, y_pred)
Ejemplo n.º 8
0
    def scorer(y_true, y_pred, **kwargs) -> float:
        """Accuracy score based on true and predicted target values.

        Parameters
        ----------
        y_true : array-like
            True labels.
        y_pred : array-like
            Predicted labels.

        Returns
        -------
        score
            float
        """
        return sklearn_accuracy_score(y_true, y_pred, **kwargs)
Ejemplo n.º 9
0
    def testAccuracyScore(self):
        service_ep = 'http://127.0.0.1:' + self.web_port
        timeout = 120 if 'CI' in os.environ else -1
        with new_session(service_ep) as sess:
            run_kwargs = {'timeout': timeout}

            rs = np.random.RandomState(0)
            raw = pd.DataFrame({
                'a': rs.randint(0, 10, (10, )),
                'b': rs.randint(0, 10, (10, ))
            })

            df = md.DataFrame(raw)
            y = df['a'].to_tensor().astype('int')
            pred = df['b'].astype('int')

            score = accuracy_score(y,
                                   pred,
                                   session=sess,
                                   run_kwargs=run_kwargs)
            expect = sklearn_accuracy_score(raw['a'].to_numpy().astype('int'),
                                            raw['b'].to_numpy().astype('int'))
            self.assertAlmostEqual(score.fetch(session=sess), expect)