コード例 #1
0
def trials(x, Y):
    accc = np.zeros((10, 1))
    ebv = np.zeros((10, 3))
    prf = np.zeros((10, 3))

    for it in np.arange(10):
        X_train, X_test, y_train, y_test = train_test_split(x,
                                                            Y,
                                                            train_size=0.7,
                                                            random_state=42)
        X_train, X_valid, y_train, y_valid = train_test_split(X_train,
                                                              y_train,
                                                              train_size=0.9)
        #X_test = X_train
        #y_test = y_train
        #X_test = X_valid
        #y_test = y_valid

        treee = DecisionTreeClassifier(max_depth=4,
                                       min_samples_split=500,
                                       min_samples_leaf=450,
                                       max_features=None,
                                       random_state=None,
                                       max_leaf_nodes=10)
        treee.fit(X_train, y_train)
        y_pred = treee.predict(X_test)

        acc = accuracy(y_test, np.transpose(y_pred))
        accc[it - 1] = acc
        #print("Accuracy:", acc)

        mse, bias, var = bias_variance_decomp(treee,
                                              X_train,
                                              y_train,
                                              X_test,
                                              y_test,
                                              loss='0-1_loss',
                                              random_seed=123)
        ebv[it - 1, 0:3] = mse, bias, var
        #print()
        #print('Average Expected Loss: %.3f' % mse)
        #print('Bias: %.3f' % bias)
        #print('Variance: %.3f' % var)

        p = precision_score(y_test, y_pred, average='binary')
        r = recall_score(y_test, y_pred, average='binary')
        f = f1_score(y_test, y_pred, average='binary')
        prf[it - 1, 0:3] = p, r, f
        #print()
        #print('Precision: %.3f' % p)
        #print('Recall: %.3f' % r)
        #print('f1: %.3f' % f)
    print(accc)
    print()
    print(ebv)
    print()
    print(prf)
コード例 #2
0
def test_mse_tree():

    X, y = boston_housing_data()
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.3,
                                                        random_state=123,
                                                        shuffle=True)

    tree = DecisionTreeRegressor(random_state=123)
    avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(
        tree, X_train, y_train, X_test, y_test, loss='mse', random_seed=123)

    assert round(avg_expected_loss, 3) == 31.536
    assert round(avg_bias, 3) == 14.096
    assert round(avg_var, 3) == 17.440
コード例 #3
0
ファイル: bias_variance.py プロジェクト: srisuhas/ML-Thunai
def bv_decomp(all_estimators, X_train, y_train, X_test, y_test):
    print("\n\n<<<<DECOMPOSING THEBIAS AND VARIANCE>>>>\n\n")
    for key, value in all_estimators.items():
        avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(
            value,
            X_train,
            y_train,
            X_test,
            y_test,
            loss='0-1_loss',
            random_seed=123)
        print('Average expected loss for {} is {}'.format(
            key, round(avg_expected_loss, 3)))
        print('Average bias for {} is {}'.format(key, round(avg_bias, 3)))
        print('Average variance for {} is {}'.format(key, round(avg_var, 3)))
        print('\n')
コード例 #4
0
    def bias_variance_decomp(self, X_train, X_test, Y_train, Y_test):

        scaler = StandardScaler()
        X_train = scaler.fit_transform(X=X_train)
        X_test = scaler.transform(X_test)
        rf = RandomForestClassifier()

        avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(
            rf, X_train, Y_train, X_test, Y_test, loss='0-1_loss')

        print('Decomposing Bias and Variance of RandomForest')
        print('-------------------------------------------')
        print('Average expected loss: %.3f' % avg_expected_loss)
        print('Average bias: %.3f' % avg_bias)
        print('Average variance: %.3f' % avg_var)
        print('-------------------------------------------')
        return
コード例 #5
0
def test_mse_tree():

    X, y = boston_housing_data()
    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        test_size=0.3,
                                                        random_state=123,
                                                        shuffle=True)

    tree = DecisionTreeRegressor(random_state=123)
    avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(
            tree, X_train, y_train, X_test, y_test,
            loss='mse',
            random_seed=123)

    assert round(avg_expected_loss, 3) == 31.917
    assert round(avg_bias, 3) == 13.814
    assert round(avg_var, 3) == 18.102
コード例 #6
0
def test_01_loss_tree():

    X, y = iris_data()
    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        test_size=0.3,
                                                        random_state=123,
                                                        shuffle=True,
                                                        stratify=y)

    tree = DecisionTreeClassifier(random_state=123)
    avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(
            tree, X_train, y_train, X_test, y_test,
            loss='0-1_loss',
            random_seed=123)

    assert round(avg_expected_loss, 3) == 0.062
    assert round(avg_bias, 3) == 0.022
    assert round(avg_var, 3) == 0.040
コード例 #7
0
def test_01_loss_tree():

    X, y = iris_data()
    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        test_size=0.3,
                                                        random_state=123,
                                                        shuffle=True,
                                                        stratify=y)

    tree = DecisionTreeClassifier(random_state=123)
    avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(
            tree, X_train, y_train, X_test, y_test,
            loss='0-1_loss',
            random_seed=123)

    assert round(avg_expected_loss, 3) == 0.062
    assert round(avg_bias, 3) == 0.022
    assert round(avg_var, 3) == 0.040
コード例 #8
0
def test_mse_bagging():

    X, y = boston_housing_data()
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.3,
                                                        random_state=123,
                                                        shuffle=True)

    tree = DecisionTreeRegressor(random_state=123)
    bag = BaggingRegressor(base_estimator=tree,
                           n_estimators=10,
                           random_state=123)

    avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(
        bag, X_train, y_train, X_test, y_test, loss='mse', random_seed=123)

    assert round(avg_expected_loss, 2) == 20.24, avg_expected_loss
    assert round(avg_bias, 2) == 15.63, avg_bias
    assert round(avg_var, 2) == 4.61, avg_var
コード例 #9
0
def test_mse_bagging():

    X, y = boston_housing_data()
    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        test_size=0.3,
                                                        random_state=123,
                                                        shuffle=True)

    tree = DecisionTreeRegressor(random_state=123)
    bag = BaggingRegressor(base_estimator=tree,
                           n_estimators=100,
                           random_state=123)

    avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(
            bag, X_train, y_train, X_test, y_test,
            loss='mse',
            random_seed=123)

    assert round(avg_expected_loss, 3) == 18.593
    assert round(avg_bias, 3) == 15.354
    assert round(avg_var, 3) == 3.239
コード例 #10
0
def get_accuracy_with_confusion_matrix(train,y,test,testY,model,classicmodel,nump=False,senti="false"):
    if senti=="add":
        train_senti,test_senti=append_senti_to_vect(train,test)
    train_model=model.fit_transform(train)
    test_model=model.fit_transform(test)
    if nump:
        train_model=train_model.toarray()
        test_model=test_model.toarray()
    if senti=="add":
        train_model=np.c_[train_model,train_senti]
        test_model=np.c_[test_model,test_senti]
    final_model=classicmodel.fit(train_model,y)
    yhat=final_model.predict(test_model)
    print("Accuracy :", np.mean(yhat == testY))
    print(classification_report(testY, yhat))
    mse, bias, var = bias_variance_decomp(final_model,train_model,y,test_model,testY,loss='mse',num_rounds=200, random_seed=1)
    print("MSE : "+str(mse))
    print("Bias : "+str(bias))
    print("Variance : "+str(var))
    
    confusionMatrix(testY,yhat)
    return final_model
コード例 #11
0
def calculate_mse_bias_variance(X, y, test_size):
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=test_size,
                                                        random_state=1)
    mse, bias, var = bias_variance_decomp(pipeline,
                                          X_train,
                                          y_train,
                                          X_test,
                                          y_test,
                                          loss='mse',
                                          num_rounds=200,
                                          random_seed=1)
    errors.append(mse)
    biases.append(bias)
    variances.append(var)
    print('Estimator :', estimator_names[i])
    print('Test Size :', test_size)
    print('MSE: %.3f' % mse)
    print('Bias: %.3f' % bias)
    print('Variance: %.3f' % var)
    print('--------------------------------')
コード例 #12
0
def perform_bias_variance_decomposition(estimator,
                                        x_train,
                                        y_train,
                                        x_test,
                                        y_test,
                                        model_uid,
                                        n_boostraps=20):
    """
    Decomposes the average loss of a model into bias and variance. Writes out the results locally.

    :param estimator: estimator object
    :param x_train: x_train
    :param y_train: y_train
    :param x_test: x_test
    :param y_test: y_test
    :param n_boostraps: number of bootstrap samples to take
    :param model_uid: model uid
    """
    x_train = x_train.reset_index(drop=True)
    y_train = y_train.reset_index(drop=True)
    x_test = x_test.reset_index(drop=True)
    y_test = y_test.reset_index(drop=True)
    avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(
        estimator,
        x_train,
        y_train,
        x_test,
        y_test,
        loss='0-1_loss',
        random_seed=1234,
        num_rounds=n_boostraps)
    pd.DataFrame({
        'avg_expected_loss': [avg_expected_loss],
        'avg_bias': [avg_bias],
        'avg_var': [avg_var]
    }).to_csv(os.path.join('modeling', model_uid, 'diagnostics',
                           'evaluation_files',
                           f'bias_variance_decomposition.csv'),
              index=False)
    def biasVarianceTradeOff(self,
                             lossFunction="mse",
                             numRounds=200,
                             display=True):
        """
        The biasVarianceTradeOff public method print the bias variance trade off (i.e.: The mean square error, the Bias, the variance) of a particular classifier ran.
        [bias_variance_decomp from mlxtend is used](http://rasbt.github.io/mlxtend/user_guide/evaluate/bias_variance_decomp/)

        Parameters
        ----------
        lossFunction: string<"mse", "0-1_loss">
            Allow to use one of the above loss function for the bias_variance_decomp API method.
        numRounds: int range(1, inf) DEFAULT=200
            Allow to give the number of bootstrapping that the API should do on the data for evaluating the model.
        display: Boolean DEFAULT=True
            Display or not the values, in the case of False, it will just stored the result into the class to use it
            later.

        Returns
        -------
        (void)
        """
        self.mse, self.bias, self.var = bias_variance_decomp(
            self.model,
            self.X_train,
            self.y_train,
            self.X_test,
            self.y_test,
            loss=lossFunction,
            num_rounds=numRounds,
            random_seed=123)

        # summarize results
        if display:
            print('mse Loss: %.3f' % self.mse)
            print('Bias: %.3f' % self.bias)
            print('Variance: %.3f' % self.var)
            print("Accuracy: %.3f" % self.accuracy)
コード例 #14
0
 def calculate_bias_variance(self):
     """ Calculate bias and variance """
     mse, bias, var = evaluate.bias_variance_decomp(
         self.model,
         np.array(self.data.x_train),
         np.array(self.data.y_train),
         np.array(self.data.x_test),
         np.array(self.data.y_test),
         loss="mse",
         num_rounds=200,
         random_seed=RANDOM_SEED,
     )
     self.output.append({
         "type": "bias_variance",
         "data": {
             "MSE": mse,
             "BIAS": bias,
             "VARIANCE": var,
         },
     })
     print(f"{self.name}: Total Error (Means, Bias, Variance) = "
           f"({mse}, {bias}, {var})")
     breakpoint()
コード例 #15
0
def calculate_mse_bias_variance(X, y, test_size):
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=1)
    mse, bias, var = bias_variance_decomp(
        KNeighborsRegressor(n_neighbors=test_size),
        X_train,
        y_train,
        X_test,
        y_test,
        loss='mse',
        num_rounds=200,
        random_seed=1)
    errors.append(mse)
    biases.append(bias)
    variances.append(var)
    print('Estimator : KNN Regressor')
    print('Degree :', test_size)
    print('MSE: %.3f' % mse)
    print('Bias: %.3f' % bias)
    print('Variance: %.3f' % var)
    print('--------------------------------')
コード例 #16
0
def pandas_input_fail():

    X, y = iris_data()
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.3,
                                                        random_state=123,
                                                        shuffle=True,
                                                        stratify=y)

    X_train = pd.DataFrame(X_train)

    tree = DecisionTreeClassifier(random_state=123)

    with pytest.raises(ValueError):
        avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(
            tree,
            X_train,
            y_train,
            X_test,
            y_test,
            loss='0-1_loss',
            random_seed=123)
コード例 #17
0
def calculate_mse_bias_variance(X, y, test_size):
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=1)
    mse, bias, var = bias_variance_decomp(MLPRegressor(
        hidden_layer_sizes=test_size, max_iter=1000),
                                          X_train,
                                          y_train,
                                          X_test,
                                          y_test,
                                          loss='mse',
                                          num_rounds=200,
                                          random_seed=1)
    errors.append(mse)
    biases.append(bias)
    variances.append(var)
    print('Estimator : Neural Networks')
    print('Test Size :', test_size)
    print('MSE: %.3f' % mse)
    print('Bias: %.3f' % bias)
    print('Variance: %.3f' % var)
    print('--------------------------------')
コード例 #18
0
def bv_decomp_wrapper(model, xtrain, ytrain, xtest, ytest):
    name = Path(model[0]).stem
    modelobject = model[1]
    print(("working on model {}".format(name)))

    avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(
        modelobject,
        xtrain,
        ytrain,
        xtest,
        ytest,
        loss="0-1_loss",
        random_seed=821996,
        num_rounds=100,
    )

    result_dict = {
        "name": name,
        "avg_bias": avg_bias,
        "avg_expected_loss": avg_expected_loss,
        "avg_var": avg_var,
    }

    return result_dict
コード例 #19
0
from mlxtend.evaluate import bias_variance_decomp

import time

start = time.time()

avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(classifier_lgbm,
                                                            X_train,
                                                            Y_train,
                                                            X_test,
                                                            Y_test,
                                                            loss='0-1_loss',
                                                            random_seed=42)

end = time.time()
print("Tempo de Execução: {:.2f} min".format((end - start) / 60))

print('Average expected loss: %.3f' % avg_expected_loss)
print('Average bias: %.3f' % avg_bias)
print('Average variance: %.3f' % avg_var)
    def SimpleDTandOptimizedDTVarianceDecomp(self):
        """
        The SimpleDTandOptimizedDTVarianceDecomp public method is the gain in variance and bias of passing from
        a simple Decision Tree to a Optimized Decision Tree.
        [Inspired from the doc/tutorials available in scikitlearn](https://scikit-learn.org/stable/auto_examples/index.html)

        The process is as follow:
        - Create the estimator
        - Evaluate his bias variance decomposition using mlxtend.
        - Doing the above step twice for the Simple and Optimized Decision tree.
        - Display the reduction of the variance from the first classifier to the second.
        - Display the introduction of the bias from the first classifier to the second.

        Parameters
        ----------
        (void)

        Returns
        -------
        (void)
        """
        dt = DecisionTreeClassifier(criterion="entropy", max_depth=2)
        error_dt, bias_dt, var_dt = bias_variance_decomp(dt,
                                                         self.X_train,
                                                         self.y_train,
                                                         self.X_test,
                                                         self.y_test,
                                                         'mse',
                                                         random_seed=123)

        param_dist = {
            "max_depth": range(3, 10),
            "criterion": ["entropy", "gini"],
        }

        OptDt = GridSearchCV(DecisionTreeClassifier(),
                             param_dist,
                             cv=10,
                             n_jobs=-1,
                             return_train_score=True)
        error_dt_pruned, bias_dt_pruned, var_dt_pruned = bias_variance_decomp(
            OptDt,
            self.X_train,
            self.y_train,
            self.X_test,
            self.y_test,
            'mse',
            random_seed=123)

        print("Variance Impact from the first to the second classifier:",
              str(np.round((var_dt_pruned / var_dt - 1) * 100, 2)) + '%')
        print("Bias Impact from the first to the second classifier:",
              str(np.round((bias_dt_pruned / bias_dt - 1) * 100, 2)) + '%')

        # fig, ax = plt.subplots(nrows=1, ncols=2)

        print(var_dt_pruned)
        print(var_dt)
        print(bias_dt_pruned)
        print(bias_dt)

        fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 8))

        algorithms = ['Simple DT', 'Optimised DT']
        biases = [bias_dt, bias_dt_pruned]
        ax[0].bar(algorithms, biases, color='lightblue')
        ax[0].set_ylabel('Bias')
        ax[0].set_title('Bias impact through a simple to an optimised DT')
        ax[0].set_xticks(algorithms)
        ax[0].set_xticklabels(algorithms)
        ax[0].legend(['Bias'])

        variances = [var_dt, var_dt_pruned]
        ax[1].bar(algorithms, variances, color='#69b3a2')
        ax[1].set_ylabel('Variance')
        ax[1].set_title(
            'Variance impact through a simple DT to an optimised DT')
        ax[1].set_xticks(algorithms)
        ax[1].set_xticklabels(algorithms)
        ax[1].legend(['Variance'])

        plt.show()
コード例 #21
0
from sklearn.linear_model import LinearRegression
from sklearn.datasets import fetch_california_housing

from mlxtend.evaluate import bias_variance_decomp

# preparing the dataset into inputs (feature matrix) and outputs (target vector)
data = fetch_california_housing()  # fetch the data
X = data.data  # feature matrix
y = data.target  # target vector

# split the data into training and test samples
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# define the model
model = LinearRegression()

# estimating the bias and variance
avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(model,
                                                            X_train,
                                                            y_train,
                                                            X_test,
                                                            y_test,
                                                            loss='mse',
                                                            num_rounds=50,
                                                            random_seed=20)

# summary of the results
print('Average expected loss: %.3f' % avg_expected_loss)
print('Average bias: %.3f' % avg_bias)
print('Average variance: %.3f' % avg_var)
コード例 #22
0
    print("Accuracy: " + str((TP + TN) / (TP + TN + FP + FN)))
    print("Classification Error: " + str((FP + FN) / (TP + TN + FP + FN)))
    print("Positive Predictive Value: " + str(TP / (TP + FP)))
    print("Demographic Parity: " + str((TP + FP) / (TP + TN + FP + FN)))
    print("False Positive Rate: " + str(FP / (TN + FP)))


evaluate(covered_points, "******Covered******")
evaluate(uncovered_points, "******Uncovered******")

covered_x_test = np.array(covered_x_test)
covered_y_test = np.array(covered_y_test)
uncovered_x_test = np.array(uncovered_x_test)
uncovered_y_test = np.array(uncovered_y_test)

mse, bias, var = bias_variance_decomp(clf, x_train_scaled, y_train, covered_x_test, covered_y_test, loss='mse',
                                      num_rounds=200,
                                      random_seed=1)
print("******Covered******")
print('MSE: %.3f' % mse)
print('Bias: %.3f' % bias)
print('Variance: %.3f' % var)

mse, bias, var = bias_variance_decomp(clf, x_train_scaled, y_train, uncovered_x_test, uncovered_y_test, loss='mse',
                                      num_rounds=200,
                                      random_seed=1)
print("******Uncovered******")
print('MSE: %.3f' % mse)
print('Bias: %.3f' % bias)
print('Variance: %.3f' % var)
コード例 #23
0
print('Accuracy Score: ', accuracy_score(y_pred, y_test))  # y_pred is the output

from sklearn.metrics import f1_score

f1_metric = f1_score(y_test, y_pred, average='macro')
print("f1 score macro:", f1_metric)

from sklearn.metrics import f1_score

f1_metric_micro = f1_score(y_test, y_pred, average='micro')
print("f1 score micro:", f1_metric_micro)


# print(tree.plot_tree(classifier))
from mlxtend.evaluate import bias_variance_decomp
mse, bias, var = bias_variance_decomp(classifier, X_train, y_train, X_test, y_test, num_rounds=200, random_seed=1)
# summarize results
print('Bias: %.3f' % bias)
print('Variance: %.3f' % var)
from sklearn.model_selection import cross_val_score
# clf = svm.SVC(kernel='linear', C=1)
scores = cross_val_score(classifier.fit(X_train, y_train), X_features_input,y_label_output, cv=5)
print('Cross Validation')
print(scores)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

#take input from the loaded model
input_sepal_length = float(input("Enter sepal length: "))
input_sepal_width = float(input("Enter sepal width:"))
input_petal_length = float(input("Enter petal Length: "))
input_petal_width = float(input("Enter petal width: "))