def _compare_with_sklearn_mse_regressor_iris(n_estimators=100, sample_weight=None,
                                             description=""):
    x_train, x_test, y_train, y_test = \
        train_test_split(IRIS.data, IRIS.target,
                         test_size=0.33, random_state=31)

    scikit_model = ScikitRandomForestRegressor(n_estimators=n_estimators,
                                               max_depth=None,
                                               random_state=777)
    daal4py_model = DaalRandomForestRegressor(n_estimators=n_estimators,
                                              max_depth=None,
                                              random_state=777)

    scikit_predict = scikit_model.fit(
        x_train, y_train,
        sample_weight=sample_weight).predict(x_test)
    daal4py_predict = daal4py_model.fit(
        x_train, y_train,
        sample_weight=sample_weight).predict(x_test)

    scikit_mse = mean_squared_error(scikit_predict, y_test)
    daal4py_mse = mean_squared_error(daal4py_predict, y_test)

    ratio = daal4py_mse / scikit_mse
    reason = description + f"scikit_mse={scikit_mse}, daal4py_mse={daal4py_mse}"
    assert ratio <= MSE_RATIO, reason
예제 #2
0
def _test_mse_regressor_sample_weight(weight):
    for _ in range(N_TRIES):
        x_train, x_test, y_train, y_test = \
            train_test_split(IRIS.data, IRIS.target,
                             test_size=0.33, random_state=31)

        scikit_model = ScikitRandomForestRegressor(n_estimators=100,
                                                   max_depth=None,
                                                   random_state=777)
        daal4py_model = DaalRandomForestRegressor(n_estimators=100,
                                                  max_depth=None,
                                                  random_state=777)

        scikit_predict = scikit_model.fit(
            x_train, y_train, sample_weight=weight[0][:100]).predict(x_test)
        daal4py_predict = daal4py_model.fit(
            x_train, y_train, sample_weight=weight[0][:100]).predict(x_test)

        scikit_mse = mean_squared_error(scikit_predict, y_test)
        daal4py_mse = mean_squared_error(daal4py_predict, y_test)

        ratio = daal4py_mse / scikit_mse
        reason = ("Regression sample weights: sample_weight_type={},"
                  "scikit_mse={}, daal4py_mse={}".format(
                      weight[1], scikit_mse, daal4py_mse))
        assert ratio <= MSE_RATIO, reason
예제 #3
0
def check_regressor_sample_weight(weight):
    for _ in range(N_TRIES):
        x_train, x_test, y_train, y_test = \
            train_test_split(IRIS.data, IRIS.target,
                             test_size=0.33, random_state=31)

        scikit_model = ScikitRandomForestRegressor(random_state=777)
        daal4py_model = DaalRandomForestRegressor(random_state=777)

        scikit_predict = scikit_model.fit(
            x_train, y_train, sample_weight=weight[0]).predict(x_test)
        daal4py_predict = daal4py_model.fit(
            x_train, y_train, sample_weight=weight[0]).predict(x_test)

        scikit_accuracy = mean_squared_error(scikit_predict, y_test)
        daal4py_accuracy = mean_squared_error(daal4py_predict, y_test)
        ratio = daal4py_accuracy / scikit_accuracy
        assert ratio <= CHECK_RATIO_REGRESSOR, \
            ('Regression sample weights: sample_weight_type=%s,scikit_accuracy=%f,\
            daal4py_accuracy=%f'                                 % (weight[1], scikit_accuracy, daal4py_accuracy))
예제 #4
0
                               n_classes=n_classes,
                               random_state=777)
    return train_test_split(x,
                            y,
                            random_state=777,
                            test_size=test_size,
                            shuffle=shuffle)


ESTIMATORS = {
    'KNeighborsClassifier':
    KNeighborsClassifier(n_neighbors=10),
    'DaalRandomForestClassifier':
    DaalRandomForestClassifier(n_estimators=10, random_state=777),
    'DaalRandomForestRegressor':
    DaalRandomForestRegressor(n_estimators=10, random_state=777),
}

ORDERS = ['C', 'F']
DATA_FORMATS = [pd.DataFrame, np.array]


def check_data_formats_diff(name):
    x_train, x_test, y_train, y_test = make_dataset()
    alg_results = []
    for data_format in DATA_FORMATS:
        for order in ORDERS:
            x_train_copy = convert_data(x_train.copy(), data_format, order)
            x_test_copy = convert_data(x_test.copy(), data_format, order)
            y_train_copy = convert_data(y_train.copy(), data_format, order)
            y_test_copy = convert_data(y_test.copy(), data_format, order)